-
module ApplicationCable
-
class Channel < ActionCable::Channel::Base
-
end
-
end
-
module ApplicationCable
-
class Connection < ActionCable::Connection::Base
-
identified_by :connection_id, :current_account_id
-
-
def connect
-
self.connection_id = SecureRandom.hex(8)
-
self.current_account_id = resolve_current_account_id
-
rescue StandardError
-
self.current_account_id = nil
-
end
-
-
private
-
-
def resolve_current_account_id
-
selected_id = request.session[:instagram_account_id].to_i
-
if selected_id.positive? && InstagramAccount.exists?(id: selected_id)
-
return selected_id
-
end
-
-
InstagramAccount.order(:id).limit(1).pick(:id)
-
end
-
end
-
end
-
class LlmCommentGenerationChannel < ApplicationCable::Channel
-
def subscribed
-
requested_account_id = params[:account_id].to_i
-
if requested_account_id <= 0
-
reject
-
return
-
end
-
-
stream_from "llm_comment_generation_#{requested_account_id}"
-
end
-
-
def unsubscribed
-
# Any cleanup needed when channel is unsubscribed
-
end
-
end
-
class OperationsChannel < ApplicationCable::Channel
-
def subscribed
-
requested_account_id = params[:account_id].to_i
-
connection_account_id = current_account_id.to_i
-
account_id = requested_account_id.positive? ? requested_account_id : connection_account_id
-
stream_from Ops::LiveUpdateBroadcaster.account_stream(account_id) if account_id.positive?
-
-
include_global = truthy?(params[:include_global]) || account_id <= 0
-
stream_from Ops::LiveUpdateBroadcaster.global_stream if include_global
-
end
-
-
private
-
-
def truthy?(raw)
-
value = raw.to_s.strip.downcase
-
%w[1 true yes on].include?(value)
-
end
-
end
-
class Admin::BackgroundJobsController < Admin::BaseController
-
def dashboard
-
@backend = queue_backend
-
-
if @backend == "sidekiq"
-
load_sidekiq_dashboard!
-
else
-
load_solid_queue_dashboard!
-
end
-
attach_recent_job_details!
-
-
@failure_logs = BackgroundJobFailure.recent_first.limit(100)
-
@recent_issues = AppIssue.recent_first.limit(15)
-
@recent_storage_ingestions = ActiveStorageIngestion.recent_first.limit(15)
-
end
-
-
def failures
-
scope = BackgroundJobFailure.order(occurred_at: :desc, id: :desc)
-
scope = apply_tabulator_filters(scope)
-
-
@q = params[:q].to_s.strip
-
if @q.present?
-
term = "%#{@q.downcase}%"
-
scope = scope.where(
-
"LOWER(job_class) LIKE ? OR LOWER(COALESCE(queue_name, '')) LIKE ? OR LOWER(error_class) LIKE ? OR LOWER(error_message) LIKE ?",
-
term, term, term, term
-
)
-
end
-
-
scope = apply_remote_sort(scope) || scope
-
-
page = params.fetch(:page, 1).to_i
-
page = 1 if page < 1
-
-
per_page_param = params[:per_page].presence || params[:size].presence
-
per_page = per_page_param.to_i
-
per_page = 50 if per_page <= 0
-
per_page = per_page.clamp(10, 200)
-
-
total = scope.count
-
pages = (total / per_page.to_f).ceil
-
@failures = scope.offset((page - 1) * per_page).limit(per_page)
-
-
respond_to do |format|
-
format.html
-
format.json do
-
render json: tabulator_payload(failures: @failures, total: total, pages: pages)
-
end
-
end
-
end
-
-
def failure
-
@failure = BackgroundJobFailure.find(params[:id])
-
end
-
-
def retry_failure
-
failure = BackgroundJobFailure.find(params[:id])
-
Jobs::FailureRetry.enqueue!(failure)
-
-
Ops::LiveUpdateBroadcaster.broadcast!(
-
topic: "job_failures_changed",
-
account_id: failure.instagram_account_id,
-
payload: { action: "retry", failure_id: failure.id },
-
throttle_key: "job_failures_changed",
-
throttle_seconds: 0
-
)
-
-
respond_to do |format|
-
format.html { redirect_to admin_background_job_failure_path(failure), notice: "Retry queued for #{failure.job_class}." }
-
format.json { render json: { ok: true } }
-
end
-
rescue Jobs::FailureRetry::RetryError => e
-
respond_to do |format|
-
format.html { redirect_to admin_background_job_failure_path(params[:id]), alert: e.message }
-
format.json { render json: { ok: false, error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
def clear_all_jobs
-
backend = queue_backend
-
-
if backend == "sidekiq"
-
clear_sidekiq_jobs!
-
else
-
clear_solid_queue_jobs!
-
end
-
-
Ops::LiveUpdateBroadcaster.broadcast!(
-
topic: "jobs_changed",
-
payload: { action: "clear_all" },
-
throttle_key: "jobs_changed",
-
throttle_seconds: 0
-
)
-
redirect_to admin_background_jobs_path, notice: "All jobs have been stopped and queue cleared successfully."
-
rescue StandardError => e
-
redirect_to admin_background_jobs_path, alert: "Failed to clear jobs: #{e.message}"
-
end
-
-
private
-
-
def queue_backend
-
Rails.application.config.active_job.queue_adapter.to_s
-
rescue StandardError
-
"unknown"
-
end
-
-
def load_solid_queue_dashboard!
-
@counts = {
-
ready: safe_count { SolidQueue::ReadyExecution.count },
-
scheduled: safe_count { SolidQueue::ScheduledExecution.count },
-
claimed: safe_count { SolidQueue::ClaimedExecution.count },
-
blocked: safe_count { SolidQueue::BlockedExecution.count },
-
failed: safe_count { SolidQueue::FailedExecution.count },
-
pauses: safe_count { SolidQueue::Pause.count },
-
jobs_total: safe_count { SolidQueue::Job.count }
-
}
-
-
@processes = safe_query { SolidQueue::Process.order(last_heartbeat_at: :desc).limit(50).to_a } || []
-
solid_jobs = safe_query { SolidQueue::Job.order(created_at: :desc).limit(100).to_a } || []
-
@recent_jobs = solid_jobs.map { |job| serialize_solid_queue_job(job) }
-
@recent_failed = safe_query do
-
SolidQueue::FailedExecution
-
.includes(:job)
-
.order(created_at: :desc)
-
.limit(50)
-
.to_a
-
end || []
-
end
-
-
def load_sidekiq_dashboard!
-
require "sidekiq/api"
-
-
queues = safe_query { Sidekiq::Queue.all } || []
-
scheduled = Sidekiq::ScheduledSet.new
-
retries = Sidekiq::RetrySet.new
-
dead = Sidekiq::DeadSet.new
-
processes = Sidekiq::ProcessSet.new
-
-
queue_rows = queues.map { |queue| { name: queue.name, size: queue.size } }
-
@counts = {
-
enqueued: queue_rows.sum { |row| row[:size].to_i },
-
scheduled: safe_count { scheduled.size },
-
retries: safe_count { retries.size },
-
dead: safe_count { dead.size },
-
processes: safe_count { processes.size },
-
queues: queue_rows
-
}
-
-
@processes = safe_query do
-
processes.map do |p|
-
{
-
identity: p["identity"],
-
hostname: p["hostname"],
-
pid: p["pid"],
-
queues: Array(p["queues"]),
-
labels: Array(p["labels"]),
-
busy: p["busy"].to_i,
-
beat: parse_time(p["beat"])
-
}
-
end.sort_by { |row| row[:beat] || Time.at(0) }.reverse.first(50)
-
end || []
-
-
enqueued_rows = queues.flat_map do |queue|
-
queue.first(30).map { |job| serialize_sidekiq_job(job: job, status: "enqueued", queue_name: queue.name) }
-
end
-
scheduled_rows = scheduled.first(30).map { |job| serialize_sidekiq_job(job: job, status: "scheduled", queue_name: job.queue) }
-
retry_rows = retries.first(20).map { |job| serialize_sidekiq_job(job: job, status: "retry", queue_name: job.queue) }
-
dead_rows = dead.first(20).map { |job| serialize_sidekiq_job(job: job, status: "dead", queue_name: job.queue) }
-
-
@recent_jobs = (enqueued_rows + scheduled_rows + retry_rows + dead_rows)
-
.sort_by { |row| row[:created_at] || Time.at(0) }
-
.reverse
-
.first(100)
-
-
@recent_failed = (retry_rows + dead_rows).first(50)
-
rescue StandardError
-
@counts = { enqueued: 0, scheduled: 0, retries: 0, dead: 0, processes: 0, queues: [] }
-
@processes = []
-
@recent_jobs = []
-
@recent_failed = []
-
end
-
-
def serialize_sidekiq_job(job:, status:, queue_name:)
-
item = job.item.to_h
-
wrapper = active_job_wrapper_from_sidekiq(item)
-
context = Jobs::ContextExtractor.from_active_job_arguments(wrapper["arguments"] || item["args"])
-
{
-
created_at: parse_time(item["created_at"] || item["enqueued_at"] || item["at"]),
-
class_name: wrapper["job_class"].presence || item["wrapped"].presence || item["class"].to_s,
-
queue_name: queue_name.to_s,
-
status: status,
-
jid: item["jid"].to_s,
-
active_job_id: wrapper["job_id"].to_s.presence,
-
provider_job_id: wrapper["provider_job_id"].to_s.presence || item["jid"].to_s.presence,
-
error_message: item["error_message"].to_s.presence,
-
job_scope: context[:job_scope],
-
context_label: context[:context_label],
-
instagram_account_id: context[:instagram_account_id],
-
instagram_profile_id: context[:instagram_profile_id],
-
arguments: wrapper["arguments"] || item["args"] || []
-
}
-
rescue StandardError
-
{
-
created_at: nil,
-
class_name: "unknown",
-
queue_name: queue_name.to_s,
-
status: status,
-
jid: nil,
-
active_job_id: nil,
-
provider_job_id: nil,
-
error_message: nil,
-
job_scope: "system",
-
context_label: "System",
-
instagram_account_id: nil,
-
instagram_profile_id: nil,
-
arguments: []
-
}
-
end
-
-
def serialize_solid_queue_job(job)
-
args = job.respond_to?(:arguments) ? job.arguments : {}
-
context = Jobs::ContextExtractor.from_solid_queue_job_arguments(args)
-
-
status =
-
if job.respond_to?(:finished_at) && job.finished_at.present?
-
"finished"
-
elsif job.respond_to?(:scheduled_at) && job.scheduled_at.present?
-
"scheduled"
-
else
-
"running/queued"
-
end
-
-
{
-
created_at: (job.created_at if job.respond_to?(:created_at)),
-
class_name: (job.class_name if job.respond_to?(:class_name)) || "unknown",
-
queue_name: (job.queue_name if job.respond_to?(:queue_name)).to_s,
-
status: status,
-
jid: (job.id.to_s if job.respond_to?(:id)),
-
active_job_id: (job.active_job_id.to_s if job.respond_to?(:active_job_id)).presence,
-
provider_job_id: nil,
-
error_message: nil,
-
job_scope: context[:job_scope],
-
context_label: context[:context_label],
-
instagram_account_id: context[:instagram_account_id],
-
instagram_profile_id: context[:instagram_profile_id],
-
arguments: args || []
-
}
-
rescue StandardError
-
{
-
created_at: nil,
-
class_name: "unknown",
-
queue_name: "",
-
status: "unknown",
-
jid: nil,
-
active_job_id: nil,
-
provider_job_id: nil,
-
error_message: nil,
-
job_scope: "system",
-
context_label: "System",
-
instagram_account_id: nil,
-
instagram_profile_id: nil,
-
arguments: []
-
}
-
end
-
-
def attach_recent_job_details!
-
rows = Array(@recent_jobs)
-
return if rows.empty?
-
-
active_job_ids = rows.map { |row| row[:active_job_id].to_s.presence }.compact.uniq
-
action_logs_by_job_id = load_action_logs_by_job_id(active_job_ids: active_job_ids)
-
failures_by_job_id = load_failures_by_job_id(active_job_ids: active_job_ids)
-
ingestions_by_job_id = load_ingestions_by_job_id(active_job_ids: active_job_ids)
-
llm_events_by_job_id = load_llm_events_by_job_id(active_job_ids: active_job_ids)
-
api_calls_by_job_id = load_api_calls_by_job_id(active_job_ids: active_job_ids)
-
-
rows.each do |row|
-
active_job_id = row[:active_job_id].to_s
-
action_log = action_logs_by_job_id[active_job_id]&.first
-
failure = failures_by_job_id[active_job_id]&.first
-
direct_ingestions = ingestions_by_job_id[active_job_id] || []
-
direct_llm_events = llm_events_by_job_id[active_job_id] || []
-
direct_api_calls = api_calls_by_job_id[active_job_id] || []
-
-
row[:details] = build_job_details(
-
row: row,
-
action_log: action_log,
-
failure: failure,
-
direct_ingestions: direct_ingestions,
-
direct_llm_events: direct_llm_events,
-
direct_api_calls: direct_api_calls
-
)
-
end
-
rescue StandardError
-
rows.each { |row| row[:details] = fallback_job_details(row: row) }
-
end
-
-
def load_action_logs_by_job_id(active_job_ids:)
-
return {} if active_job_ids.empty?
-
-
InstagramProfileActionLog
-
.includes(:instagram_account, :instagram_profile)
-
.where(active_job_id: active_job_ids)
-
.order(created_at: :desc)
-
.to_a
-
.group_by { |row| row.active_job_id.to_s }
-
rescue StandardError
-
{}
-
end
-
-
def load_failures_by_job_id(active_job_ids:)
-
return {} if active_job_ids.empty?
-
-
BackgroundJobFailure
-
.where(active_job_id: active_job_ids)
-
.order(occurred_at: :desc, id: :desc)
-
.to_a
-
.group_by { |row| row.active_job_id.to_s }
-
rescue StandardError
-
{}
-
end
-
-
def load_ingestions_by_job_id(active_job_ids:)
-
return {} if active_job_ids.empty?
-
-
ActiveStorageIngestion
-
.where(created_by_active_job_id: active_job_ids)
-
.order(created_at: :desc, id: :desc)
-
.limit(400)
-
.to_a
-
.group_by { |row| row.created_by_active_job_id.to_s }
-
rescue StandardError
-
{}
-
end
-
-
def load_llm_events_by_job_id(active_job_ids:)
-
return {} if active_job_ids.empty?
-
-
InstagramProfileEvent
-
.where(llm_comment_job_id: active_job_ids)
-
.order(updated_at: :desc, id: :desc)
-
.limit(300)
-
.to_a
-
.group_by { |row| row.llm_comment_job_id.to_s }
-
rescue StandardError
-
{}
-
end
-
-
def load_api_calls_by_job_id(active_job_ids:)
-
return {} if active_job_ids.empty?
-
-
index = Hash.new { |h, k| h[k] = [] }
-
AiApiCall.recent_first.limit(600).to_a.each do |call|
-
metadata = call.metadata.is_a?(Hash) ? call.metadata : {}
-
active_job_id = metadata["active_job_id"].to_s
-
next if active_job_id.blank? || !active_job_ids.include?(active_job_id)
-
-
index[active_job_id] << call
-
end
-
index
-
rescue StandardError
-
{}
-
end
-
-
def build_job_details(row:, action_log:, failure:, direct_ingestions:, direct_llm_events:, direct_api_calls:)
-
window = inferred_time_window(row: row, action_log: action_log, failure: failure)
-
api_calls = direct_api_calls.presence || fallback_api_calls(row: row, window: window)
-
ingestions = direct_ingestions.presence || fallback_ingestions(row: row, window: window)
-
llm_events = direct_llm_events.presence || fallback_llm_events(row: row, window: window)
-
ai_analyses = related_ai_analyses(row: row, action_log: action_log, window: window)
-
story_rows = related_story_rows(row: row, window: window)
-
-
processing_steps = build_processing_steps(
-
row: row,
-
action_log: action_log,
-
failure: failure,
-
api_calls: api_calls,
-
ingestions: ingestions,
-
llm_events: llm_events,
-
ai_analyses: ai_analyses,
-
story_rows: story_rows
-
)
-
-
final_output = build_final_output(row: row, action_log: action_log, failure: failure)
-
technical_data = build_technical_data(action_log: action_log, llm_events: llm_events, ai_analyses: ai_analyses, story_rows: story_rows)
-
-
{
-
processing_steps: processing_steps,
-
final_output: final_output,
-
api_responses: api_calls.first(8).map { |call| serialize_api_call(call) },
-
technical_data: technical_data,
-
blobs: ingestions.first(10).map { |row_item| serialize_ingestion(row_item) }
-
}
-
rescue StandardError
-
fallback_job_details(row: row)
-
end
-
-
def fallback_job_details(row:)
-
{
-
processing_steps: [ "No detailed processing records were linked to this job yet." ],
-
final_output: {
-
status: row[:status].to_s,
-
summary: row[:error_message].to_s.presence || "No final output captured yet."
-
}.compact,
-
api_responses: [],
-
technical_data: [],
-
blobs: []
-
}
-
end
-
-
def inferred_time_window(row:, action_log:, failure:)
-
started_candidates = [
-
action_log&.started_at,
-
action_log&.occurred_at,
-
row[:created_at],
-
failure&.occurred_at
-
].compact
-
ended_candidates = [
-
action_log&.finished_at,
-
failure&.occurred_at,
-
row[:created_at]
-
].compact
-
return nil if started_candidates.empty? && ended_candidates.empty?
-
-
started_at = (started_candidates.min || ended_candidates.min) - 20.minutes
-
ended_at = (ended_candidates.max || started_at + 2.hours) + 20.minutes
-
started_at..ended_at
-
rescue StandardError
-
nil
-
end
-
-
def fallback_api_calls(row:, window:)
-
account_id = row[:instagram_account_id].to_i
-
return [] unless account_id.positive?
-
-
scope = AiApiCall.where(instagram_account_id: account_id).order(occurred_at: :desc, id: :desc)
-
scope = scope.where(occurred_at: window) if window
-
scope.limit(8).to_a
-
rescue StandardError
-
[]
-
end
-
-
def fallback_ingestions(row:, window:)
-
scope = ActiveStorageIngestion.order(created_at: :desc, id: :desc)
-
profile_id = row[:instagram_profile_id].to_i
-
account_id = row[:instagram_account_id].to_i
-
return [] unless profile_id.positive? || account_id.positive?
-
-
scope = scope.where(instagram_profile_id: profile_id) if profile_id.positive?
-
scope = scope.where(instagram_account_id: account_id) if !profile_id.positive? && account_id.positive?
-
scope = scope.where(created_at: window) if window
-
scope.limit(10).to_a
-
rescue StandardError
-
[]
-
end
-
-
def fallback_llm_events(row:, window:)
-
profile_id = row[:instagram_profile_id].to_i
-
return [] unless profile_id.positive?
-
-
scope = InstagramProfileEvent.where(instagram_profile_id: profile_id).order(updated_at: :desc, id: :desc)
-
scope = scope.where(updated_at: window) if window
-
scope.limit(6).to_a.select do |event|
-
llm_meta = event.llm_comment_metadata.is_a?(Hash) ? event.llm_comment_metadata : {}
-
raw_meta = event.metadata.is_a?(Hash) ? event.metadata : {}
-
llm_meta.present? || raw_meta["processing_metadata"].is_a?(Hash) || raw_meta["local_story_intelligence"].is_a?(Hash)
-
end
-
rescue StandardError
-
[]
-
end
-
-
def related_ai_analyses(row:, action_log:, window:)
-
account_id = row[:instagram_account_id].to_i
-
return [] unless account_id.positive?
-
-
scope = AiAnalysis.where(instagram_account_id: account_id).order(created_at: :desc, id: :desc)
-
scope = scope.where(created_at: window) if window
-
-
profile_id = row[:instagram_profile_id].to_i
-
if profile_id.positive?
-
scope = scope.where(analyzable_type: "InstagramProfile", analyzable_id: profile_id)
-
end
-
-
purpose_hint = purpose_hint_for(row: row, action_log: action_log)
-
scope = scope.where(purpose: purpose_hint) if purpose_hint.present?
-
-
scope.limit(6).to_a
-
rescue StandardError
-
[]
-
end
-
-
def purpose_hint_for(row:, action_log:)
-
klass = row[:class_name].to_s
-
action = action_log&.action.to_s
-
return "post" if klass.include?("AnalyzeInstagramPostJob") || action == "capture_profile_posts" || action == "analyze_profile_posts"
-
return "profile" if klass.include?("AnalyzeInstagramProfileJob") || action == "analyze_profile"
-
-
nil
-
end
-
-
def related_story_rows(row:, window:)
-
profile_id = row[:instagram_profile_id].to_i
-
return [] unless profile_id.positive?
-
-
scope = InstagramStory.where(instagram_profile_id: profile_id).order(updated_at: :desc, id: :desc)
-
scope = scope.where(updated_at: window) if window
-
scope.limit(6).to_a.select do |story|
-
metadata = story.metadata.is_a?(Hash) ? story.metadata : {}
-
metadata["processing_metadata"].is_a?(Hash) ||
-
metadata["generated_response_suggestions"].present? ||
-
metadata["content_understanding"].is_a?(Hash)
-
end
-
rescue StandardError
-
[]
-
end
-
-
def build_processing_steps(row:, action_log:, failure:, api_calls:, ingestions:, llm_events:, ai_analyses:, story_rows:)
-
steps = []
-
if row[:created_at].present?
-
steps << "Queued in #{row[:queue_name].to_s.presence || '-'} at #{row[:created_at].iso8601}."
-
else
-
steps << "Queued in #{row[:queue_name].to_s.presence || '-'}."
-
end
-
-
if action_log
-
steps << "Action log '#{action_log.action}' recorded with status '#{action_log.status}'."
-
steps << "Execution started at #{action_log.started_at.iso8601}." if action_log.started_at.present?
-
steps << "Execution finished at #{action_log.finished_at.iso8601}." if action_log.finished_at.present?
-
end
-
steps << "Captured #{api_calls.length} related API call(s)." if api_calls.any?
-
steps << "Generated #{ai_analyses.length} AI analysis record(s)." if ai_analyses.any?
-
steps << "Updated #{llm_events.length} LLM/story event record(s)." if llm_events.any?
-
steps << "Persisted #{story_rows.length} story processing artifact(s)." if story_rows.any?
-
steps << "Stored #{ingestions.length} blob/file ingestion record(s)." if ingestions.any?
-
if failure
-
steps << "Failed at #{failure.occurred_at&.iso8601 || 'unknown time'} with #{failure.error_class}: #{failure.error_message.to_s.byteslice(0, 240)}"
-
end
-
-
steps.uniq.first(12)
-
end
-
-
def build_final_output(row:, action_log:, failure:)
-
{
-
status: action_log&.status.to_s.presence || (failure.present? ? "failed" : row[:status].to_s),
-
summary: action_log&.log_text.to_s.presence || failure&.error_message.to_s.presence || row[:error_message].to_s.presence || "No final output captured yet.",
-
error_class: failure&.error_class.to_s.presence,
-
error_message: action_log&.error_message.to_s.presence || failure&.error_message.to_s.presence || row[:error_message].to_s.presence,
-
metadata: compact_data(action_log&.metadata)
-
}.compact
-
end
-
-
def build_technical_data(action_log:, llm_events:, ai_analyses:, story_rows:)
-
rows = []
-
rows << {
-
source: "profile_action_log",
-
payload: compact_data(action_log.metadata)
-
} if action_log&.metadata.is_a?(Hash)
-
-
llm_events.first(4).each do |event|
-
rows << {
-
source: "instagram_profile_event",
-
payload: {
-
event_id: event.id,
-
event_kind: event.kind,
-
llm_comment_status: event.llm_comment_status,
-
llm_comment_model: event.llm_comment_model,
-
llm_comment_provider: event.llm_comment_provider,
-
generated_comment: event.llm_generated_comment.to_s.presence&.byteslice(0, 280),
-
relevance_score: event.llm_comment_relevance_score,
-
llm_comment_metadata: compact_data(event.llm_comment_metadata),
-
metadata: compact_data(event.metadata)
-
}.compact
-
}
-
end
-
-
ai_analyses.first(4).each do |analysis|
-
rows << {
-
source: "ai_analysis",
-
payload: {
-
analysis_id: analysis.id,
-
purpose: analysis.purpose,
-
provider: analysis.provider,
-
model: analysis.model,
-
status: analysis.status,
-
started_at: analysis.started_at&.iso8601,
-
finished_at: analysis.finished_at&.iso8601,
-
response_excerpt: analysis.response_text.to_s.presence&.byteslice(0, 320),
-
analysis: compact_data(analysis.analysis),
-
metadata: compact_data(analysis.metadata)
-
}.compact
-
}
-
end
-
-
story_rows.first(4).each do |story|
-
metadata = story.metadata.is_a?(Hash) ? story.metadata : {}
-
rows << {
-
source: "instagram_story",
-
payload: {
-
story_id: story.story_id,
-
media_type: story.media_type,
-
processing_status: story.processing_status,
-
processed: story.processed,
-
processed_at: story.processed_at&.iso8601,
-
metadata: compact_data(
-
metadata.slice(
-
"processing_metadata",
-
"generated_response_suggestions",
-
"content_understanding",
-
"face_count",
-
"content_signals",
-
"ocr_text",
-
"transcript",
-
"object_detections",
-
"scenes"
-
)
-
)
-
}.compact
-
}
-
end
-
-
rows.first(12)
-
end
-
-
def serialize_api_call(call)
-
metadata = call.metadata.is_a?(Hash) ? call.metadata : {}
-
{
-
occurred_at: call.occurred_at&.iso8601,
-
provider: call.provider,
-
operation: call.operation,
-
category: call.category,
-
status: call.status,
-
http_status: call.http_status,
-
latency_ms: call.latency_ms,
-
input_tokens: call.input_tokens,
-
output_tokens: call.output_tokens,
-
total_tokens: call.total_tokens,
-
error_message: call.error_message.to_s.presence,
-
metadata: compact_data(metadata)
-
}.compact
-
end
-
-
def serialize_ingestion(row)
-
{
-
created_at: row.created_at&.iso8601,
-
attachment_name: row.attachment_name,
-
record_type: row.record_type,
-
record_id: row.record_id,
-
blob_filename: row.blob_filename,
-
blob_content_type: row.blob_content_type,
-
blob_byte_size: row.blob_byte_size,
-
metadata: compact_data(row.metadata)
-
}.compact
-
end
-
-
def compact_data(value, depth: 0, max_depth: 3)
-
return nil if value.nil?
-
return "[depth_limit]" if depth >= max_depth
-
-
case value
-
when Hash
-
compacted = {}
-
value.to_h.each do |key, item|
-
normalized = compact_data(item, depth: depth + 1, max_depth: max_depth)
-
next if normalized.blank? && normalized != false && normalized != 0
-
-
compacted[key.to_s] = normalized
-
break if compacted.length >= 20
-
end
-
compacted
-
when Array
-
value.first(10).map { |item| compact_data(item, depth: depth + 1, max_depth: max_depth) }.compact
-
when String
-
text = value.to_s.strip
-
return nil if text.blank?
-
-
text.byteslice(0, 320)
-
when Time, Date, DateTime
-
value.iso8601
-
else
-
value
-
end
-
rescue StandardError
-
value.to_s.byteslice(0, 320)
-
end
-
-
def active_job_wrapper_from_sidekiq(item)
-
args = Array(item["args"])
-
first = args.first
-
return first.to_h if first.respond_to?(:to_h) && first.to_h["job_class"].present?
-
-
{}
-
rescue StandardError
-
{}
-
end
-
-
def parse_time(value)
-
return nil if value.blank?
-
-
Time.at(value.to_f)
-
rescue StandardError
-
nil
-
end
-
-
def safe_count
-
yield
-
rescue StandardError
-
0
-
end
-
-
def safe_query
-
yield
-
rescue StandardError
-
nil
-
end
-
-
def apply_tabulator_filters(scope)
-
extract_tabulator_filters.each do |f|
-
field = f[:field]
-
value = f[:value]
-
next if value.blank?
-
-
case field
-
when "job_class"
-
term = "%#{value.downcase}%"
-
scope = scope.where("LOWER(job_class) LIKE ?", term)
-
when "queue_name"
-
term = "%#{value.downcase}%"
-
scope = scope.where("LOWER(COALESCE(queue_name,'')) LIKE ?", term)
-
when "error_message"
-
term = "%#{value.downcase}%"
-
scope = scope.where("LOWER(COALESCE(error_message,'')) LIKE ?", term)
-
when "failure_kind"
-
scope = scope.where(failure_kind: value.to_s)
-
when "retryable"
-
parsed = ActiveModel::Type::Boolean.new.cast(value)
-
scope = scope.where(retryable: parsed)
-
end
-
end
-
scope
-
end
-
-
def extract_tabulator_filters
-
raw = params[:filters].presence || params[:filter]
-
return [] unless raw.present?
-
-
entries =
-
case raw
-
when String
-
JSON.parse(raw)
-
when Array
-
raw
-
when ActionController::Parameters
-
raw.to_unsafe_h.values
-
else
-
[]
-
end
-
-
Array(entries).filter_map do |item|
-
h = item.respond_to?(:to_h) ? item.to_h : {}
-
field = h["field"].to_s
-
next if field.blank?
-
-
{ field: field, value: h["value"] }
-
end
-
rescue StandardError
-
[]
-
end
-
-
def apply_remote_sort(scope)
-
sorters = extract_tabulator_sorters
-
return nil unless sorters.is_a?(Array)
-
-
first = sorters.first
-
return nil unless first.respond_to?(:[])
-
-
field = first["field"].to_s
-
dir = first["dir"].to_s.downcase == "desc" ? "DESC" : "ASC"
-
-
case field
-
when "occurred_at"
-
scope.order(Arel.sql("occurred_at #{dir}, id #{dir}"))
-
when "job_class"
-
scope.order(Arel.sql("job_class #{dir}, occurred_at DESC, id DESC"))
-
when "queue_name"
-
scope.order(Arel.sql("queue_name #{dir} NULLS LAST, occurred_at DESC, id DESC"))
-
when "error_class"
-
scope.order(Arel.sql("error_class #{dir}, occurred_at DESC, id DESC"))
-
when "failure_kind"
-
scope.order(Arel.sql("failure_kind #{dir}, occurred_at DESC, id DESC"))
-
else
-
nil
-
end
-
end
-
-
def tabulator_payload(failures:, total:, pages:)
-
data = failures.map do |f|
-
scope = failure_scope(f)
-
{
-
id: f.id,
-
occurred_at: f.occurred_at&.iso8601,
-
job_scope: scope,
-
context_label: failure_context_label(f, scope: scope),
-
instagram_account_id: f.instagram_account_id,
-
instagram_profile_id: f.instagram_profile_id,
-
job_class: f.job_class,
-
queue_name: f.queue_name,
-
failure_kind: f.failure_kind,
-
retryable: f.retryable_now?,
-
error_class: f.error_class,
-
error_message: f.error_message,
-
open_url: Rails.application.routes.url_helpers.admin_background_job_failure_path(f),
-
retry_url: Rails.application.routes.url_helpers.admin_retry_background_job_failure_path(f)
-
}
-
end
-
-
{
-
data: data,
-
last_page: pages,
-
last_row: total
-
}
-
end
-
-
def failure_scope(failure)
-
return "profile" if failure.instagram_profile_id.present?
-
return "account" if failure.instagram_account_id.present?
-
"system"
-
end
-
-
def failure_context_label(failure, scope:)
-
case scope
-
when "profile"
-
"Profile ##{failure.instagram_profile_id} (Account ##{failure.instagram_account_id || '?'})"
-
when "account"
-
"Account ##{failure.instagram_account_id}"
-
else
-
"System"
-
end
-
end
-
-
def extract_tabulator_sorters
-
raw = params[:sorters].presence || params[:sort]
-
return [] unless raw.present?
-
-
case raw
-
when String
-
parsed = JSON.parse(raw)
-
parsed.is_a?(Array) ? parsed : []
-
when Array
-
raw
-
when ActionController::Parameters
-
raw.to_unsafe_h.values
-
else
-
[]
-
end
-
rescue StandardError
-
[]
-
end
-
-
def clear_sidekiq_jobs!
-
require "sidekiq/api"
-
-
# Clear all queues
-
Sidekiq::Queue.all.each(&:clear)
-
-
# Clear scheduled jobs
-
Sidekiq::ScheduledSet.new.clear
-
-
# Clear retry jobs
-
Sidekiq::RetrySet.new.clear
-
-
# Clear dead jobs
-
Sidekiq::DeadSet.new.clear
-
-
# Stop all processes by sending quiet signal
-
Sidekiq::ProcessSet.new.each do |process|
-
process.quiet! if process.alive?
-
end
-
end
-
-
def clear_solid_queue_jobs!
-
# Clear all job executions
-
SolidQueue::ReadyExecution.delete_all
-
SolidQueue::ScheduledExecution.delete_all
-
SolidQueue::ClaimedExecution.delete_all
-
SolidQueue::BlockedExecution.delete_all
-
SolidQueue::FailedExecution.delete_all
-
SolidQueue::Job.delete_all
-
-
# Stop all processes
-
SolidQueue::Process.delete_all
-
end
-
end
-
class Admin::BaseController < ApplicationController
-
before_action :require_admin!
-
-
private
-
-
def require_admin!
-
user = Rails.application.credentials.dig(:admin, :user).presence || ENV["ADMIN_USER"].to_s
-
pass = Rails.application.credentials.dig(:admin, :password).presence || ENV["ADMIN_PASSWORD"].to_s
-
-
# If no creds are configured, leave admin pages open for easier setup.
-
# You can enable auth later by setting both credentials/admin env vars.
-
return if user.blank? && pass.blank?
-
if user.blank? || pass.blank?
-
render plain: "Admin credentials are partially configured. Set both user and password, or clear both to disable auth.", status: :service_unavailable
-
return
-
end
-
-
authenticate_or_request_with_http_basic("Admin") do |u, p|
-
ActiveSupport::SecurityUtils.secure_compare(u.to_s, user.to_s) &
-
ActiveSupport::SecurityUtils.secure_compare(p.to_s, pass.to_s)
-
end
-
end
-
end
-
class Admin::IssuesController < Admin::BaseController
-
def index
-
scope = AppIssue.includes(:background_job_failure).recent_first
-
scope = apply_tabulator_filters(scope)
-
-
q = params[:q].to_s.strip
-
if q.present?
-
term = "%#{q.downcase}%"
-
scope = scope.where(
-
"LOWER(title) LIKE ? OR LOWER(COALESCE(details, '')) LIKE ? OR LOWER(issue_type) LIKE ? OR LOWER(source) LIKE ?",
-
term, term, term, term
-
)
-
end
-
-
scope = apply_remote_sort(scope) || scope
-
-
page = params.fetch(:page, 1).to_i
-
page = 1 if page < 1
-
per_page = (params[:per_page].presence || params[:size].presence || 50).to_i.clamp(10, 200)
-
-
total = scope.count
-
pages = (total / per_page.to_f).ceil
-
@issues = scope.offset((page - 1) * per_page).limit(per_page)
-
-
respond_to do |format|
-
format.html
-
format.json { render json: tabulator_payload(issues: @issues, total: total, pages: pages) }
-
end
-
end
-
-
def update
-
issue = AppIssue.find(params[:id])
-
status = params[:status].to_s
-
notes = params[:resolution_notes].to_s
-
-
case status
-
when "open"
-
issue.mark_open!(notes: notes)
-
when "pending"
-
issue.mark_pending!(notes: notes)
-
when "resolved"
-
issue.mark_resolved!(notes: notes)
-
else
-
raise ArgumentError, "Unsupported status: #{status}"
-
end
-
-
respond_to do |format|
-
format.html { redirect_to admin_issues_path, notice: "Issue ##{issue.id} updated." }
-
format.json { render json: { ok: true, id: issue.id, status: issue.status } }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_to admin_issues_path, alert: "Unable to update issue: #{e.message}" }
-
format.json { render json: { ok: false, error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
def retry_job
-
issue = AppIssue.find(params[:id])
-
failure = issue.background_job_failure
-
raise Jobs::FailureRetry::RetryError, "Issue is not linked to a failed background job" unless failure
-
-
Jobs::FailureRetry.enqueue!(failure)
-
issue.mark_pending!(notes: "Retry queued at #{Time.current.iso8601}.")
-
-
respond_to do |format|
-
format.html { redirect_to admin_issues_path, notice: "Retry queued for issue ##{issue.id}." }
-
format.json { render json: { ok: true } }
-
end
-
rescue Jobs::FailureRetry::RetryError => e
-
respond_to do |format|
-
format.html { redirect_to admin_issues_path, alert: e.message }
-
format.json { render json: { ok: false, error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
private
-
-
def apply_tabulator_filters(scope)
-
extract_tabulator_filters.each do |f|
-
field = f[:field]
-
value = f[:value]
-
next if value.blank?
-
-
case field
-
when "status"
-
scope = scope.where(status: value.to_s)
-
when "severity"
-
scope = scope.where(severity: value.to_s)
-
when "issue_type"
-
term = "%#{value.downcase}%"
-
scope = scope.where("LOWER(issue_type) LIKE ?", term)
-
when "source"
-
term = "%#{value.downcase}%"
-
scope = scope.where("LOWER(source) LIKE ?", term)
-
end
-
end
-
scope
-
end
-
-
def extract_tabulator_filters
-
raw = params[:filters].presence || params[:filter]
-
return [] unless raw.present?
-
-
entries =
-
case raw
-
when String
-
JSON.parse(raw)
-
when Array
-
raw
-
when ActionController::Parameters
-
raw.to_unsafe_h.values
-
else
-
[]
-
end
-
-
Array(entries).filter_map do |item|
-
h = item.respond_to?(:to_h) ? item.to_h : {}
-
field = h["field"].to_s
-
next if field.blank?
-
{ field: field, value: h["value"] }
-
end
-
rescue StandardError
-
[]
-
end
-
-
def apply_remote_sort(scope)
-
sorters = extract_tabulator_sorters
-
return nil unless sorters.is_a?(Array)
-
-
first = sorters.first
-
return nil unless first.respond_to?(:[])
-
-
field = first["field"].to_s
-
dir = first["dir"].to_s.downcase == "desc" ? "DESC" : "ASC"
-
-
case field
-
when "last_seen_at"
-
scope.order(Arel.sql("last_seen_at #{dir}, id #{dir}"))
-
when "severity"
-
scope.order(Arel.sql("severity #{dir}, last_seen_at DESC, id DESC"))
-
when "status"
-
scope.order(Arel.sql("status #{dir}, last_seen_at DESC, id DESC"))
-
when "occurrences"
-
scope.order(Arel.sql("occurrences #{dir}, last_seen_at DESC, id DESC"))
-
else
-
nil
-
end
-
end
-
-
def extract_tabulator_sorters
-
raw = params[:sorters].presence || params[:sort]
-
return [] unless raw.present?
-
-
case raw
-
when String
-
parsed = JSON.parse(raw)
-
parsed.is_a?(Array) ? parsed : []
-
when Array
-
raw
-
when ActionController::Parameters
-
raw.to_unsafe_h.values
-
else
-
[]
-
end
-
rescue StandardError
-
[]
-
end
-
-
def tabulator_payload(issues:, total:, pages:)
-
data = issues.map do |issue|
-
{
-
id: issue.id,
-
title: issue.title,
-
issue_type: issue.issue_type,
-
source: issue.source,
-
severity: issue.severity,
-
status: issue.status,
-
details: issue.details.to_s,
-
occurrences: issue.occurrences.to_i,
-
first_seen_at: issue.first_seen_at&.iso8601,
-
last_seen_at: issue.last_seen_at&.iso8601,
-
instagram_account_id: issue.instagram_account_id,
-
instagram_profile_id: issue.instagram_profile_id,
-
retryable: issue.retryable?,
-
failure_url: issue.background_job_failure ? Rails.application.routes.url_helpers.admin_background_job_failure_path(issue.background_job_failure) : nil,
-
update_url: Rails.application.routes.url_helpers.admin_issue_path(issue),
-
retry_url: Rails.application.routes.url_helpers.retry_job_admin_issue_path(issue)
-
}
-
end
-
-
{ data: data, last_page: pages, last_row: total }
-
end
-
end
-
class Admin::StorageIngestionsController < Admin::BaseController
-
def index
-
scope = ActiveStorageIngestion.includes(:blob).recent_first
-
scope = apply_tabulator_filters(scope)
-
scope = apply_remote_sort(scope) || scope
-
-
page = params.fetch(:page, 1).to_i
-
page = 1 if page < 1
-
per_page = (params[:per_page].presence || params[:size].presence || 50).to_i.clamp(10, 200)
-
-
total = scope.count
-
pages = (total / per_page.to_f).ceil
-
@ingestions = scope.offset((page - 1) * per_page).limit(per_page)
-
-
respond_to do |format|
-
format.html
-
format.json { render json: tabulator_payload(ingestions: @ingestions, total: total, pages: pages) }
-
end
-
end
-
-
private
-
-
def apply_tabulator_filters(scope)
-
extract_tabulator_filters.each do |f|
-
field = f[:field]
-
value = f[:value]
-
next if value.blank?
-
-
case field
-
when "attachment_name"
-
term = "%#{value.downcase}%"
-
scope = scope.where("LOWER(attachment_name) LIKE ?", term)
-
when "record_type"
-
term = "%#{value.downcase}%"
-
scope = scope.where("LOWER(COALESCE(record_type, '')) LIKE ?", term)
-
when "created_by_job_class"
-
term = "%#{value.downcase}%"
-
scope = scope.where("LOWER(COALESCE(created_by_job_class, '')) LIKE ?", term)
-
end
-
end
-
scope
-
end
-
-
def extract_tabulator_filters
-
raw = params[:filters].presence || params[:filter]
-
return [] unless raw.present?
-
-
entries =
-
case raw
-
when String
-
JSON.parse(raw)
-
when Array
-
raw
-
when ActionController::Parameters
-
raw.to_unsafe_h.values
-
else
-
[]
-
end
-
-
Array(entries).filter_map do |item|
-
h = item.respond_to?(:to_h) ? item.to_h : {}
-
field = h["field"].to_s
-
next if field.blank?
-
{ field: field, value: h["value"] }
-
end
-
rescue StandardError
-
[]
-
end
-
-
def apply_remote_sort(scope)
-
sorters = extract_tabulator_sorters
-
return nil unless sorters.is_a?(Array)
-
-
first = sorters.first
-
return nil unless first.respond_to?(:[])
-
-
field = first["field"].to_s
-
dir = first["dir"].to_s.downcase == "desc" ? "DESC" : "ASC"
-
-
case field
-
when "created_at"
-
scope.order(Arel.sql("created_at #{dir}, id #{dir}"))
-
when "blob_byte_size"
-
scope.order(Arel.sql("blob_byte_size #{dir}, created_at DESC, id DESC"))
-
when "record_type"
-
scope.order(Arel.sql("record_type #{dir} NULLS LAST, created_at DESC, id DESC"))
-
else
-
nil
-
end
-
end
-
-
def extract_tabulator_sorters
-
raw = params[:sorters].presence || params[:sort]
-
return [] unless raw.present?
-
-
case raw
-
when String
-
parsed = JSON.parse(raw)
-
parsed.is_a?(Array) ? parsed : []
-
when Array
-
raw
-
when ActionController::Parameters
-
raw.to_unsafe_h.values
-
else
-
[]
-
end
-
rescue StandardError
-
[]
-
end
-
-
def tabulator_payload(ingestions:, total:, pages:)
-
data = ingestions.map do |row|
-
{
-
id: row.id,
-
created_at: row.created_at&.iso8601,
-
attachment_name: row.attachment_name,
-
record_type: row.record_type,
-
record_id: row.record_id,
-
blob_filename: row.blob_filename,
-
blob_content_type: row.blob_content_type,
-
blob_byte_size: row.blob_byte_size,
-
created_by_job_class: row.created_by_job_class,
-
created_by_active_job_id: row.created_by_active_job_id,
-
queue_name: row.queue_name,
-
instagram_account_id: row.instagram_account_id,
-
instagram_profile_id: row.instagram_profile_id,
-
blob_url: Rails.application.routes.url_helpers.rails_blob_path(row.blob, disposition: "attachment", only_path: true),
-
record_url: record_url_for(row)
-
}
-
end
-
-
{ data: data, last_page: pages, last_row: total }
-
end
-
-
def record_url_for(row)
-
case row.record_type
-
when "InstagramAccount"
-
Rails.application.routes.url_helpers.instagram_account_path(row.record_id)
-
when "InstagramProfile"
-
Rails.application.routes.url_helpers.instagram_profile_path(row.record_id)
-
when "InstagramPost"
-
Rails.application.routes.url_helpers.instagram_post_path(row.record_id)
-
else
-
nil
-
end
-
rescue StandardError
-
nil
-
end
-
end
-
class AiDashboardController < ApplicationController
-
before_action :require_current_account!
-
skip_forgery_protection only: [:test_service, :test_all_services]
-
-
require 'net/http'
-
require 'uri'
-
require 'json'
-
require 'base64'
-
require 'securerandom'
-
-
AI_SERVICE_URL = "http://localhost:8000"
-
-
def index
-
@service_status = check_ai_services(force: refresh_requested?)
-
@test_results = {}
-
end
-
-
def test_service
-
service_name = params[:service_name]
-
test_type = params[:test_type]
-
-
case service_name
-
when 'vision'
-
@test_results = test_vision_service(test_type)
-
when 'face'
-
@test_results = test_face_service(test_type)
-
when 'ocr'
-
@test_results = test_ocr_service(test_type)
-
when 'whisper'
-
@test_results = test_whisper_service(test_type)
-
when 'video'
-
@test_results = test_video_service(test_type)
-
else
-
@test_results = { error: "Unknown service: #{service_name}" }
-
end
-
-
respond_to do |format|
-
format.json { render json: @test_results }
-
format.html {
-
flash[:notice] = "Test completed for #{service_name}"
-
redirect_to ai_dashboard_path
-
}
-
end
-
end
-
-
def test_all_services
-
@test_results = {}
-
-
@test_results[:vision] = test_vision_service('labels')
-
@test_results[:face] = test_face_service('detection')
-
@test_results[:ocr] = test_ocr_service('text_extraction')
-
@test_results[:whisper] = test_whisper_service('transcription')
-
@test_results[:video] = test_video_service('analysis')
-
-
respond_to do |format|
-
format.json { render json: @test_results }
-
format.html {
-
flash[:notice] = "All services tested"
-
redirect_to ai_dashboard_path
-
}
-
end
-
end
-
-
private
-
-
def check_ai_services(force: false)
-
health = if force
-
Ops::LocalAiHealth.check(force: true)
-
else
-
Ops::LocalAiHealth.status
-
end
-
-
enqueue_health_refresh_if_needed(health: health) unless force
-
-
checked_at = parse_health_checked_at(health[:checked_at])
-
stale = ActiveModel::Type::Boolean.new.cast(health[:stale])
-
-
if ActiveModel::Type::Boolean.new.cast(health[:ok])
-
service_map = health.dig(:details, :microservice, :services) || {}
-
service_map = service_map.merge(
-
"ollama" => Array(health.dig(:details, :ollama, :models)).any?
-
)
-
-
{
-
status: "online",
-
services: service_map,
-
stale: stale,
-
source: health[:source].to_s,
-
last_check: checked_at
-
}
-
else
-
message = health[:error].presence || "Local AI stack unavailable"
-
-
{
-
status: "offline",
-
message: message,
-
stale: stale,
-
source: health[:source].to_s,
-
last_check: checked_at
-
}
-
end
-
end
-
-
def refresh_requested?
-
ActiveModel::Type::Boolean.new.cast(params[:refresh])
-
end
-
-
def enqueue_health_refresh_if_needed(health:)
-
stale = ActiveModel::Type::Boolean.new.cast(health[:stale])
-
unhealthy = !ActiveModel::Type::Boolean.new.cast(health[:ok])
-
return unless stale || unhealthy
-
-
throttle_key = "ops:local_ai_health:refresh_enqueued"
-
return if Rails.cache.read(throttle_key)
-
-
job = CheckAiMicroserviceHealthJob.perform_later
-
Rails.cache.write(throttle_key, job.job_id, expires_in: 45.seconds)
-
rescue StandardError
-
nil
-
end
-
-
def parse_health_checked_at(value)
-
text = value.to_s.strip
-
return Time.current if text.blank?
-
-
Time.iso8601(text)
-
rescue StandardError
-
Time.current
-
end
-
-
def test_vision_service(test_type)
-
begin
-
case test_type
-
when 'labels'
-
test_image = create_test_image
-
-
uri = URI("#{AI_SERVICE_URL}/analyze/image")
-
req = Net::HTTP::Post.new(uri)
-
-
# Create multipart form data
-
boundary = "----WebKitFormBoundary#{SecureRandom.hex(16)}"
-
-
post_data = []
-
post_data << "--#{boundary}\r\n"
-
post_data << "Content-Disposition: form-data; name=\"features\"\r\n\r\n"
-
post_data << "labels\r\n"
-
post_data << "--#{boundary}\r\n"
-
post_data << "Content-Disposition: form-data; name=\"file\"; filename=\"test.png\"\r\n"
-
post_data << "Content-Type: image/png\r\n\r\n"
-
post_data << test_image
-
post_data << "\r\n--#{boundary}--\r\n"
-
-
req.body = post_data.join
-
req['Content-Type'] = "multipart/form-data; boundary=#{boundary}"
-
-
response = Net::HTTP.start(uri.hostname, uri.port) do |http|
-
http.request(req)
-
end
-
-
if response.code == '200'
-
data = JSON.parse(response.body)
-
{
-
success: true,
-
result: data['results']['labels'] || [],
-
message: "Label detection working - found #{(data['results']['labels'] || []).length} objects"
-
}
-
else
-
{ success: false, error: "HTTP #{response.code}: #{response.body}" }
-
end
-
else
-
{ success: false, error: "Unknown test type: #{test_type}" }
-
end
-
rescue StandardError => e
-
{ success: false, error: e.message }
-
end
-
end
-
-
def test_face_service(test_type)
-
begin
-
case test_type
-
when 'detection'
-
test_image = create_test_image
-
-
uri = URI("#{AI_SERVICE_URL}/analyze/image")
-
req = Net::HTTP::Post.new(uri)
-
-
boundary = "----WebKitFormBoundary#{SecureRandom.hex(16)}"
-
-
post_data = []
-
post_data << "--#{boundary}\r\n"
-
post_data << "Content-Disposition: form-data; name=\"features\"\r\n\r\n"
-
post_data << "faces\r\n"
-
post_data << "--#{boundary}\r\n"
-
post_data << "Content-Disposition: form-data; name=\"file\"; filename=\"test.png\"\r\n"
-
post_data << "Content-Type: image/png\r\n\r\n"
-
post_data << test_image
-
post_data << "\r\n--#{boundary}--\r\n"
-
-
req.body = post_data.join
-
req['Content-Type'] = "multipart/form-data; boundary=#{boundary}"
-
-
response = Net::HTTP.start(uri.hostname, uri.port) do |http|
-
http.request(req)
-
end
-
-
if response.code == '200'
-
data = JSON.parse(response.body)
-
face_count = (data['results']['faces'] || []).length
-
{
-
success: true,
-
result: data['results']['faces'] || [],
-
message: "Face detection working - found #{face_count} face(s)"
-
}
-
else
-
{ success: false, error: "HTTP #{response.code}: #{response.body}" }
-
end
-
when 'embedding'
-
test_image = create_test_image
-
-
uri = URI("#{AI_SERVICE_URL}/face/embedding")
-
req = Net::HTTP::Post.new(uri)
-
-
boundary = "----WebKitFormBoundary#{SecureRandom.hex(16)}"
-
-
post_data = []
-
post_data << "--#{boundary}\r\n"
-
post_data << "Content-Disposition: form-data; name=\"file\"; filename=\"test.png\"\r\n"
-
post_data << "Content-Type: image/png\r\n\r\n"
-
post_data << test_image
-
post_data << "\r\n--#{boundary}--\r\n"
-
-
req.body = post_data.join
-
req['Content-Type'] = "multipart/form-data; boundary=#{boundary}"
-
-
response = Net::HTTP.start(uri.hostname, uri.port) do |http|
-
http.request(req)
-
end
-
-
if response.code == '200'
-
data = JSON.parse(response.body)
-
embedding_size = data['metadata']['embedding_size'] || 0
-
{
-
success: true,
-
result: data['embedding'] ? "Embedding generated (size: #{embedding_size})" : nil,
-
message: "Face embedding working - generated #{embedding_size}-dimensional vector"
-
}
-
else
-
{ success: false, error: "HTTP #{response.code}: #{response.body}" }
-
end
-
when 'comparison'
-
test_image = create_test_image
-
-
uri = URI("#{AI_SERVICE_URL}/face/compare")
-
req = Net::HTTP::Post.new(uri)
-
-
boundary = "----WebKitFormBoundary#{SecureRandom.hex(16)}"
-
-
post_data = []
-
post_data << "--#{boundary}\r\n"
-
post_data << "Content-Disposition: form-data; name=\"file1\"; filename=\"test1.png\"\r\n"
-
post_data << "Content-Type: image/png\r\n\r\n"
-
post_data << test_image
-
post_data << "\r\n--#{boundary}\r\n"
-
post_data << "Content-Disposition: form-data; name=\"file2\"; filename=\"test2.png\"\r\n"
-
post_data << "Content-Type: image/png\r\n\r\n"
-
post_data << test_image
-
post_data << "\r\n--#{boundary}--\r\n"
-
-
req.body = post_data.join
-
req['Content-Type'] = "multipart/form-data; boundary=#{boundary}"
-
-
response = Net::HTTP.start(uri.hostname, uri.port) do |http|
-
http.request(req)
-
end
-
-
if response.code == '200'
-
data = JSON.parse(response.body)
-
similarity = data['similarity'] || 0
-
{
-
success: true,
-
result: data,
-
message: "Face comparison working - similarity score: #{similarity.round(3)}"
-
}
-
else
-
{ success: false, error: "HTTP #{response.code}: #{response.body}" }
-
end
-
else
-
{ success: false, error: "Unknown test type: #{test_type}" }
-
end
-
rescue StandardError => e
-
{ success: false, error: e.message }
-
end
-
end
-
-
def test_ocr_service(test_type)
-
begin
-
case test_type
-
when 'text_extraction'
-
test_image = create_test_image_with_text
-
-
uri = URI("#{AI_SERVICE_URL}/analyze/image")
-
req = Net::HTTP::Post.new(uri)
-
-
boundary = "----WebKitFormBoundary#{SecureRandom.hex(16)}"
-
-
post_data = []
-
post_data << "--#{boundary}\r\n"
-
post_data << "Content-Disposition: form-data; name=\"features\"\r\n\r\n"
-
post_data << "text\r\n"
-
post_data << "--#{boundary}\r\n"
-
post_data << "Content-Disposition: form-data; name=\"file\"; filename=\"test.png\"\r\n"
-
post_data << "Content-Type: image/png\r\n\r\n"
-
post_data << test_image
-
post_data << "\r\n--#{boundary}--\r\n"
-
-
req.body = post_data.join
-
req['Content-Type'] = "multipart/form-data; boundary=#{boundary}"
-
-
response = Net::HTTP.start(uri.hostname, uri.port) do |http|
-
http.request(req)
-
end
-
-
if response.code == '200'
-
data = JSON.parse(response.body)
-
text_count = (data['results']['text'] || []).length
-
extracted_text = (data['results']['text'] || []).map { |t| t['text'] }.join(', ')
-
{
-
success: true,
-
result: data['results']['text'] || [],
-
message: "OCR text extraction working - found #{text_count} text region(s): #{extracted_text.length > 50 ? extracted_text[0..47] + '...' : extracted_text}"
-
}
-
else
-
{ success: false, error: "HTTP #{response.code}: #{response.body}" }
-
end
-
else
-
{ success: false, error: "Unknown test type: #{test_type}" }
-
end
-
rescue StandardError => e
-
{ success: false, error: e.message }
-
end
-
end
-
-
def test_whisper_service(test_type)
-
begin
-
case test_type
-
when 'transcription'
-
# For now, just test if the endpoint responds
-
# In a real implementation, you'd create a test audio file
-
uri = URI("#{AI_SERVICE_URL}/transcribe/audio")
-
req = Net::HTTP::Post.new(uri)
-
-
response = Net::HTTP.start(uri.hostname, uri.port) do |http|
-
http.request(req)
-
end
-
-
# We expect this to fail without a file, but it shows the service is running
-
if response.code == '422' || response.code == '400'
-
{
-
success: true,
-
result: "Endpoint accessible",
-
message: "Whisper service responding"
-
}
-
else
-
{ success: false, error: "Unexpected response: #{response.code}" }
-
end
-
else
-
{ success: false, error: "Unknown test type: #{test_type}" }
-
end
-
rescue StandardError => e
-
{ success: false, error: e.message }
-
end
-
end
-
-
def test_video_service(test_type)
-
begin
-
case test_type
-
when 'analysis'
-
# For now, just test if the endpoint responds
-
uri = URI("#{AI_SERVICE_URL}/analyze/video")
-
req = Net::HTTP::Post.new(uri)
-
-
response = Net::HTTP.start(uri.hostname, uri.port) do |http|
-
http.request(req)
-
end
-
-
# We expect this to fail without a file, but it shows the service is running
-
if response.code == '422' || response.code == '400'
-
{
-
success: true,
-
result: "Endpoint accessible",
-
message: "Video service responding"
-
}
-
else
-
{ success: false, error: "Unexpected response: #{response.code}" }
-
end
-
else
-
{ success: false, error: "Unknown test type: #{test_type}" }
-
end
-
rescue StandardError => e
-
{ success: false, error: e.message }
-
end
-
end
-
-
def create_test_image
-
# Create a simple 1x1 pixel PNG image for testing
-
require 'base64'
-
-
# Base64 encoded 1x1 transparent PNG
-
png_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg=="
-
Base64.decode64(png_data)
-
end
-
-
def create_test_image_with_text
-
# Create a simple test image that might contain some text patterns
-
# For now, use the same test image - in a real implementation you'd create
-
# an image with actual text for OCR testing
-
create_test_image
-
end
-
end
-
class ApplicationController < ActionController::Base
-
private
-
-
def current_account
-
return @current_account if defined?(@current_account)
-
-
# Prefer an explicitly selected account (multi-account support).
-
selected_id = session[:instagram_account_id]
-
@current_account =
-
if selected_id.present?
-
InstagramAccount.find_by(id: selected_id)
-
end
-
-
# Fallback to the first account if none selected.
-
@current_account ||= InstagramAccount.order(:id).first
-
-
# Optional bootstrap for older single-account setups.
-
if @current_account.nil?
-
bootstrap_username = Rails.application.config.x.instagram.username.to_s.strip
-
@current_account = InstagramAccount.create!(username: bootstrap_username) if bootstrap_username.present?
-
end
-
-
@current_account
-
end
-
-
helper_method :current_account
-
-
def require_current_account!
-
return if current_account.present?
-
-
redirect_to instagram_accounts_path, alert: "Add an Instagram account first."
-
end
-
end
-
module ProfilePostPreviewSupport
-
extend ActiveSupport::Concern
-
-
PROFILE_POST_PREVIEW_ENQUEUE_TTL_SECONDS = 30.minutes
-
-
included do
-
helper_method :preferred_profile_post_preview_image_url
-
end
-
-
private
-
-
def preferred_profile_post_preview_image_url(post:, metadata:)
-
if post.preview_image.attached?
-
return Rails.application.routes.url_helpers.rails_blob_path(post.preview_image, only_path: true)
-
end
-
-
data = metadata.is_a?(Hash) ? metadata : {}
-
direct_url = [
-
data["preview_image_url"],
-
data["poster_url"],
-
data["image_url"],
-
data["media_url_image"]
-
].find(&:present?)
-
return direct_url.to_s if direct_url.present?
-
-
local_profile_post_preview_representation_url(post: post)
-
end
-
-
def local_profile_post_preview_representation_url(post:)
-
return nil unless post.media.attached?
-
return nil unless post.media.blob&.content_type.to_s.start_with?("video/")
-
-
enqueue_profile_post_preview_generation(post: post)
-
view_context.url_for(post.media.preview(resize_to_limit: [ 640, 640 ]))
-
rescue StandardError
-
nil
-
end
-
-
def enqueue_profile_post_preview_generation(post:)
-
return if post.preview_image.attached?
-
-
cache_key = "profile_post:preview_enqueue:#{post.id}"
-
Rails.cache.fetch(cache_key, expires_in: PROFILE_POST_PREVIEW_ENQUEUE_TTL_SECONDS) do
-
GenerateProfilePostPreviewImageJob.perform_later(instagram_profile_post_id: post.id)
-
true
-
end
-
rescue StandardError => e
-
Rails.logger.warn("[profile_post_preview] preview enqueue failed post_id=#{post.id}: #{e.class}: #{e.message}")
-
nil
-
end
-
end
-
class FeedCapturesController < ApplicationController
-
before_action :require_current_account!
-
-
def create
-
rounds = params.fetch(:rounds, 4).to_i.clamp(1, 25)
-
delay_seconds = params.fetch(:delay_seconds, 45).to_i.clamp(10, 120)
-
max_new = params.fetch(:max_new, 20).to_i.clamp(1, 200)
-
-
CaptureHomeFeedJob.perform_later(
-
instagram_account_id: current_account.id,
-
rounds: rounds,
-
delay_seconds: delay_seconds,
-
max_new: max_new
-
)
-
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_account_path(current_account), notice: "Feed capture queued." }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Feed capture queued." }
-
)
-
end
-
format.json { head :accepted }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_account_path(current_account), alert: "Unable to queue feed capture: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to queue feed capture: #{e.message}" }
-
)
-
end
-
format.json { render json: { error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
end
-
class FollowGraphSyncsController < ApplicationController
-
before_action :require_current_account!
-
-
def create
-
run = current_account.sync_runs.create!(kind: "follow_graph", status: "queued")
-
SyncFollowGraphJob.perform_later(instagram_account_id: current_account.id, sync_run_id: run.id)
-
-
respond_to do |format|
-
format.html { redirect_to instagram_profiles_path, notice: "Follow graph sync queued. You will be notified when it completes." }
-
format.turbo_stream do
-
render turbo_stream: [
-
turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Follow graph sync queued. You will be notified when it completes." }
-
),
-
turbo_stream.replace(
-
"sync_status",
-
partial: "sync_runs/status",
-
locals: { sync_run: run }
-
)
-
]
-
end
-
format.json { head :accepted }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_to instagram_profiles_path, alert: "Unable to queue follow graph sync: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to queue follow graph sync: #{e.message}" }
-
)
-
end
-
format.json { render json: { error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
end
-
class InstagramAccountsController < ApplicationController
-
STORY_SYNC_LIMIT = SyncHomeStoryCarouselJob::STORY_BATCH_LIMIT
-
CONTINUOUS_STORY_SYNC_CYCLE_LIMIT = SyncAllHomeStoriesJob::MAX_CYCLES
-
STORY_ARCHIVE_SLOW_REQUEST_MS = Integer(ENV.fetch("STORY_ARCHIVE_SLOW_REQUEST_MS", "2000"))
-
-
before_action :set_account, only: %i[
-
show update destroy select manual_login import_cookies export_cookies validate_session
-
sync_next_profiles sync_profile_stories sync_stories_with_comments
-
sync_all_stories_continuous story_media_archive generate_llm_comment technical_details
-
run_continuous_processing
-
]
-
before_action :normalize_navigation_format, only: %i[show]
-
around_action :log_story_media_archive_request, only: %i[story_media_archive]
-
-
def index
-
@accounts = InstagramAccount.order(:id).to_a
-
@metrics = Ops::Metrics.system
-
end
-
-
def show
-
session[:instagram_account_id] = @account.id if session[:instagram_account_id].blank?
-
-
snapshot = InstagramAccounts::DashboardSnapshotService.new(account: @account).call
-
@issues = snapshot[:issues]
-
@metrics = snapshot[:metrics]
-
@latest_sync_run = snapshot[:latest_sync_run]
-
@recent_failures = snapshot[:recent_failures]
-
@recent_audit_entries = snapshot[:recent_audit_entries]
-
@actions_todo_queue = snapshot[:actions_todo_queue]
-
@skip_diagnostics = snapshot[:skip_diagnostics]
-
end
-
-
def create
-
username = params.dig(:instagram_account, :username).to_s.strip
-
raise "Username cannot be blank" if username.blank?
-
-
account = InstagramAccount.create!(username: username)
-
session[:instagram_account_id] = account.id
-
redirect_to instagram_account_path(account), notice: "Account added."
-
rescue StandardError => e
-
redirect_to instagram_accounts_path, alert: "Unable to add account: #{e.message}"
-
end
-
-
def update
-
if @account.update(account_params)
-
redirect_to instagram_account_path(@account), notice: "Account updated."
-
else
-
redirect_to instagram_account_path(@account), alert: @account.errors.full_messages.to_sentence
-
end
-
end
-
-
def destroy
-
@account.destroy!
-
session[:instagram_account_id] = nil if session[:instagram_account_id].to_i == @account.id
-
redirect_to instagram_accounts_path, notice: "Account removed."
-
rescue StandardError => e
-
redirect_to instagram_account_path(@account), alert: "Unable to remove account: #{e.message}"
-
end
-
-
def select
-
session[:instagram_account_id] = @account.id
-
redirect_to instagram_account_path(@account), notice: "Selected #{@account.username}.", status: :see_other
-
end
-
-
def manual_login
-
Instagram::Client.new(account: @account).manual_login!(timeout_seconds: timeout_seconds)
-
@account.update!(login_state: "authenticated")
-
-
redirect_to instagram_account_path(@account), notice: "Manual login completed and session bundle saved."
-
rescue StandardError => e
-
redirect_to instagram_account_path(@account), alert: "Manual login failed: #{e.message}"
-
end
-
-
def import_cookies
-
payload = params[:cookies_json].to_s
-
parsed = JSON.parse(payload)
-
-
@account.cookies = parsed
-
@account.login_state = "authenticated"
-
@account.save!
-
-
redirect_to instagram_account_path(@account), notice: "Cookies imported successfully."
-
rescue JSON::ParserError
-
redirect_to instagram_account_path(@account), alert: "Invalid JSON format for cookies."
-
rescue StandardError => e
-
redirect_to instagram_account_path(@account), alert: "Cookie import failed: #{e.message}"
-
end
-
-
def export_cookies
-
send_data(
-
JSON.pretty_generate(@account.cookies),
-
filename: "instagram_cookies_#{@account.username}.json",
-
type: "application/json"
-
)
-
end
-
-
def validate_session
-
client = Instagram::Client.new(account: @account)
-
validation_result = client.validate_session!
-
-
respond_to do |format|
-
format.html { redirect_to instagram_account_path(@account), notice: validation_result[:message] }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: validation_result[:valid] ? "notice" : "alert", message: validation_result[:message] }
-
)
-
end
-
format.json { render json: validation_result }
-
end
-
rescue StandardError => e
-
error_message = "Session validation failed: #{e.message}"
-
respond_to do |format|
-
format.html { redirect_to instagram_account_path(@account), alert: error_message }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: error_message }
-
)
-
end
-
format.json { render json: { valid: false, message: error_message }, status: :unprocessable_entity }
-
end
-
end
-
-
def sync_next_profiles
-
limit = params.fetch(:limit, 10).to_i.clamp(1, 50)
-
SyncNextProfilesForAccountJob.perform_later(instagram_account_id: @account.id, limit: limit)
-
respond_to do |format|
-
format.html { redirect_to instagram_account_path(@account), notice: "Queued sync for next #{limit} profiles." }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Queued sync for next #{limit} profiles." }
-
)
-
end
-
format.json { head :accepted }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_to instagram_account_path(@account), alert: "Unable to queue next-profile sync: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to queue next-profile sync: #{e.message}" }
-
)
-
end
-
format.json { render json: { error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
def sync_profile_stories
-
story_limit = params.fetch(:story_limit, STORY_SYNC_LIMIT).to_i.clamp(1, STORY_SYNC_LIMIT)
-
SyncHomeStoryCarouselJob.perform_later(
-
instagram_account_id: @account.id,
-
story_limit: story_limit,
-
auto_reply_only: false
-
)
-
respond_to do |format|
-
format.html { redirect_to instagram_account_path(@account), notice: "Queued next #{story_limit} stories." }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Queued next #{story_limit} stories." }
-
)
-
end
-
format.json { head :accepted }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_to instagram_account_path(@account), alert: "Unable to queue story sync: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to queue story sync: #{e.message}" }
-
)
-
end
-
format.json { render json: { error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
def sync_stories_with_comments
-
story_limit = params.fetch(:story_limit, STORY_SYNC_LIMIT).to_i.clamp(1, STORY_SYNC_LIMIT)
-
SyncHomeStoryCarouselJob.perform_later(
-
instagram_account_id: @account.id,
-
story_limit: story_limit,
-
auto_reply_only: true
-
)
-
respond_to do |format|
-
format.html { redirect_to instagram_account_path(@account), notice: "Queued next #{story_limit} stories (auto-reply tag required)." }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Queued next #{story_limit} stories (auto-reply tag required)." }
-
)
-
end
-
format.json { head :accepted }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_to instagram_account_path(@account), alert: "Unable to queue story sync with comments: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to queue story sync with comments: #{e.message}" }
-
)
-
end
-
format.json { render json: { error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
def sync_all_stories_continuous
-
SyncAllHomeStoriesJob.perform_later(
-
instagram_account_id: @account.id,
-
cycle_story_limit: STORY_SYNC_LIMIT
-
)
-
respond_to do |format|
-
format.html { redirect_to instagram_account_path(@account), notice: "Queued continuous story sync with auto-replies." }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Queued continuous story sync with auto-replies." }
-
)
-
end
-
format.json { head :accepted }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_to instagram_account_path(@account), alert: "Unable to queue continuous story sync: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to queue continuous story sync: #{e.message}" }
-
)
-
end
-
format.json { render json: { error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
def run_continuous_processing
-
trigger_source = params[:trigger_source].to_s.presence || "manual_account_trigger"
-
-
ProcessInstagramAccountContinuouslyJob.perform_later(
-
instagram_account_id: @account.id,
-
trigger_source: trigger_source
-
)
-
-
respond_to do |format|
-
format.html { redirect_to instagram_account_path(@account), notice: "Queued continuous processing pipeline." }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Queued continuous processing pipeline." }
-
)
-
end
-
format.json { render json: { status: "queued" }, status: :accepted }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_to instagram_account_path(@account), alert: "Unable to queue continuous processing: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to queue continuous processing: #{e.message}" }
-
)
-
end
-
format.json { render json: { error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
def story_media_archive
-
result = InstagramAccounts::StoryArchiveQuery.new(
-
account: @account,
-
page: params.fetch(:page, 1),
-
per_page: params.fetch(:per_page, 12),
-
on: params[:on]
-
).call
-
-
render json: {
-
items: result.events.map { |event| InstagramAccounts::StoryArchiveItemSerializer.new(event: event).call },
-
page: result.page,
-
per_page: result.per_page,
-
total: result.total,
-
has_more: result.has_more,
-
on: result.on&.iso8601
-
}
-
rescue StandardError => e
-
render json: { error: e.message }, status: :unprocessable_entity
-
end
-
-
def generate_llm_comment
-
result = InstagramAccounts::LlmCommentRequestService.new(
-
account: @account,
-
event_id: params.require(:event_id),
-
provider: params.fetch(:provider, :local),
-
model: params[:model].presence,
-
status_only: params[:status_only]
-
).call
-
-
render json: result.payload, status: result.status
-
end
-
-
def technical_details
-
result = InstagramAccounts::TechnicalDetailsPayloadService.new(
-
account: @account,
-
event_id: params.require(:event_id)
-
).call
-
-
render json: result.payload, status: result.status
-
end
-
-
private
-
-
def set_account
-
@account = InstagramAccount.find(params[:id])
-
end
-
-
def account_params
-
params.require(:instagram_account).permit(:username)
-
end
-
-
def timeout_seconds
-
params.fetch(:timeout_seconds, 180).to_i.clamp(60, 900)
-
end
-
-
def log_story_media_archive_request
-
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
-
yield
-
ensure
-
elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at) * 1000.0).round(1)
-
return if elapsed_ms < STORY_ARCHIVE_SLOW_REQUEST_MS
-
-
pool_stats = ActiveRecord::Base.connection_pool.stat rescue {}
-
Rails.logger.warn(
-
"[story_media_archive] slow request " \
-
"account_id=#{@account&.id} elapsed_ms=#{elapsed_ms} " \
-
"pool_size=#{pool_stats[:size]} pool_busy=#{pool_stats[:busy]} " \
-
"pool_waiting=#{pool_stats[:waiting]}"
-
)
-
end
-
-
def normalize_navigation_format
-
request.format = :html if request.format.turbo_stream?
-
end
-
-
end
-
class InstagramPostsController < ApplicationController
-
before_action :require_current_account!
-
-
def index
-
@account = current_account
-
-
scope = @account.instagram_posts
-
scope = apply_tabulator_filters(scope)
-
-
@q = params[:q].to_s.strip
-
if @q.present?
-
term = "%#{@q.downcase}%"
-
scope = scope.where("LOWER(shortcode) LIKE ? OR LOWER(COALESCE(author_username,'')) LIKE ?", term, term)
-
end
-
-
if params[:status].present?
-
scope = scope.where(status: params[:status].to_s)
-
end
-
-
scope = apply_remote_sort(scope) || scope.order(detected_at: :desc, id: :desc)
-
-
page = params.fetch(:page, 1).to_i
-
page = 1 if page < 1
-
per_page_param = params[:per_page].presence || params[:size].presence
-
per_page = per_page_param.to_i
-
per_page = 50 if per_page <= 0
-
per_page = per_page.clamp(10, 200)
-
-
total = scope.count
-
pages = (total / per_page.to_f).ceil
-
posts = scope.offset((page - 1) * per_page).limit(per_page)
-
-
respond_to do |format|
-
format.html
-
format.json do
-
render json: tabulator_payload(posts: posts, total: total, pages: pages)
-
end
-
end
-
end
-
-
def show
-
@account = current_account
-
@post = @account.instagram_posts.find(params[:id])
-
@latest_analysis = @post.ai_analyses.where(purpose: "post").recent_first.first
-
end
-
-
private
-
-
def apply_tabulator_filters(scope)
-
extract_tabulator_filters.each do |f|
-
field = f[:field]
-
value = f[:value]
-
next if value.blank?
-
-
case field
-
when "author_username"
-
term = "%#{value.downcase}%"
-
scope = scope.where("LOWER(COALESCE(author_username,'')) LIKE ?", term)
-
when "status"
-
scope = scope.where(status: value.to_s)
-
when "post_kind"
-
scope = scope.where(post_kind: value.to_s)
-
end
-
end
-
-
scope
-
end
-
-
def extract_tabulator_filters
-
raw = params[:filters].presence || params[:filter]
-
return [] unless raw.present?
-
-
entries =
-
case raw
-
when String
-
JSON.parse(raw)
-
when Array
-
raw
-
when ActionController::Parameters
-
raw.to_unsafe_h.values
-
else
-
[]
-
end
-
-
Array(entries).filter_map do |item|
-
h = item.respond_to?(:to_h) ? item.to_h : {}
-
field = h["field"].to_s
-
next if field.blank?
-
-
{ field: field, value: h["value"] }
-
end
-
rescue StandardError
-
[]
-
end
-
-
def apply_remote_sort(scope)
-
sorters = extract_tabulator_sorters
-
return nil unless sorters.is_a?(Array)
-
-
first = sorters.first
-
return nil unless first.respond_to?(:[])
-
-
field = first["field"].to_s
-
dir = first["dir"].to_s.downcase == "desc" ? "DESC" : "ASC"
-
-
case field
-
when "detected_at"
-
scope.order(Arel.sql("detected_at #{dir}, id #{dir}"))
-
when "author_username"
-
scope.order(Arel.sql("author_username #{dir} NULLS LAST, detected_at DESC, id DESC"))
-
when "status"
-
scope.order(Arel.sql("status #{dir}, detected_at DESC, id DESC"))
-
else
-
nil
-
end
-
end
-
-
def tabulator_payload(posts:, total:, pages:)
-
data = posts.map do |p|
-
{
-
id: p.id,
-
shortcode: p.shortcode,
-
post_kind: p.post_kind,
-
author_username: p.author_username,
-
detected_at: p.detected_at&.iso8601,
-
status: p.status,
-
relevant: p.analysis.is_a?(Hash) ? p.analysis["relevant"] : nil,
-
author_type: p.analysis.is_a?(Hash) ? p.analysis["author_type"] : nil,
-
permalink: p.permalink,
-
media_attached: p.media.attached?,
-
open_url: Rails.application.routes.url_helpers.instagram_post_path(p)
-
}
-
end
-
-
{ data: data, last_page: pages, last_row: total }
-
end
-
-
def extract_tabulator_sorters
-
raw = params[:sorters].presence || params[:sort]
-
return [] unless raw.present?
-
-
case raw
-
when String
-
parsed = JSON.parse(raw)
-
parsed.is_a?(Array) ? parsed : []
-
when Array
-
raw
-
when ActionController::Parameters
-
raw.to_unsafe_h.values
-
else
-
[]
-
end
-
rescue StandardError
-
[]
-
end
-
end
-
class InstagramProfileActionsController < ApplicationController
-
before_action :require_current_account!
-
-
def download_missing_avatars
-
DownloadMissingAvatarsJob.perform_later(instagram_account_id: current_account.id)
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profiles_path, notice: "Avatar sync queued." }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Avatar sync queued." }
-
)
-
end
-
format.json { head :accepted }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profiles_path, alert: "Unable to queue avatar sync: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to queue avatar sync: #{e.message}" }
-
)
-
end
-
format.json { render json: { error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
def analyze
-
profile = current_account.instagram_profiles.find(params[:id])
-
enqueue_profile_job(
-
profile: profile,
-
action: "analyze_profile",
-
job_class: AnalyzeInstagramProfileJob
-
)
-
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "AI analysis queued." }
-
format.turbo_stream do
-
render turbo_stream: queued_action_streams(profile: profile, message: "AI analysis queued for #{profile.username}.")
-
end
-
format.json { head :accepted }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(params[:id]), alert: "Unable to queue AI analysis: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to queue AI analysis: #{e.message}" }
-
)
-
end
-
format.json { render json: { error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
def capture_posts
-
profile = current_account.instagram_profiles.find(params[:id])
-
enqueue_profile_job(
-
profile: profile,
-
action: "capture_profile_posts",
-
job_class: CaptureInstagramProfilePostsJob,
-
extra_job_args: {
-
comments_limit: 20
-
}
-
)
-
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "Profile post capture queued." }
-
format.turbo_stream do
-
render turbo_stream: queued_action_streams(profile: profile, message: "Profile post capture queued for #{profile.username}.")
-
end
-
format.json { head :accepted }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(params[:id]), alert: "Unable to queue post capture: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to queue post capture: #{e.message}" }
-
)
-
end
-
format.json { render json: { error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
def fetch_details
-
profile = current_account.instagram_profiles.find(params[:id])
-
enqueue_profile_job(
-
profile: profile,
-
action: "fetch_profile_details",
-
job_class: FetchInstagramProfileDetailsJob
-
)
-
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "Profile fetch queued." }
-
format.turbo_stream do
-
render turbo_stream: queued_action_streams(profile: profile, message: "Profile fetch queued for #{profile.username}.")
-
end
-
format.json { head :accepted }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(params[:id]), alert: "Unable to queue fetch: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to queue fetch: #{e.message}" }
-
)
-
end
-
format.json { render json: { error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
def build_history
-
profile = current_account.instagram_profiles.find(params[:id])
-
enqueue_profile_job(
-
profile: profile,
-
action: "build_history",
-
job_class: BuildInstagramProfileHistoryJob
-
)
-
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "History build queued." }
-
format.turbo_stream do
-
render turbo_stream: queued_action_streams(profile: profile, message: "History build queued for #{profile.username}.")
-
end
-
format.json { head :accepted }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(params[:id]), alert: "Unable to queue history build: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to queue history build: #{e.message}" }
-
)
-
end
-
format.json { render json: { error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
def verify_messageability
-
profile = current_account.instagram_profiles.find(params[:id])
-
enqueue_profile_job(
-
profile: profile,
-
action: "verify_messageability",
-
job_class: VerifyInstagramMessageabilityJob
-
)
-
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "Messageability check queued." }
-
format.turbo_stream do
-
render turbo_stream: queued_action_streams(profile: profile, message: "Messageability check queued for #{profile.username}.")
-
end
-
format.json { head :accepted }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(params[:id]), alert: "Unable to queue messageability check: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to queue messageability check: #{e.message}" }
-
)
-
end
-
format.json { render json: { error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
def download_avatar
-
profile = current_account.instagram_profiles.find(params[:id])
-
enqueue_profile_job(
-
profile: profile,
-
action: "sync_avatar",
-
job_class: DownloadInstagramProfileAvatarJob
-
)
-
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "Avatar download queued." }
-
format.turbo_stream do
-
render turbo_stream: queued_action_streams(profile: profile, message: "Avatar download queued for #{profile.username}.")
-
end
-
format.json { head :accepted }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(params[:id]), alert: "Unable to queue avatar download: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to queue avatar download: #{e.message}" }
-
)
-
end
-
format.json { render json: { error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
def sync_stories
-
profile = current_account.instagram_profiles.find(params[:id])
-
enqueue_profile_job(
-
profile: profile,
-
action: "sync_stories",
-
job_class: SyncInstagramProfileStoriesJob,
-
extra_job_args: {
-
max_stories: 10,
-
force_analyze_all: false,
-
auto_reply: false
-
}
-
)
-
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "Story sync queued." }
-
format.turbo_stream do
-
render turbo_stream: queued_action_streams(profile: profile, message: "Story sync queued for #{profile.username}.")
-
end
-
format.json { head :accepted }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(params[:id]), alert: "Unable to queue story sync: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to queue story sync: #{e.message}" }
-
)
-
end
-
format.json { render json: { error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
def sync_stories_force
-
profile = current_account.instagram_profiles.find(params[:id])
-
enqueue_profile_job(
-
profile: profile,
-
action: "sync_stories",
-
job_class: SyncInstagramProfileStoriesJob,
-
extra_job_args: {
-
max_stories: 10,
-
force_analyze_all: true,
-
auto_reply: false
-
}
-
)
-
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "Force story analysis queued." }
-
format.turbo_stream do
-
render turbo_stream: queued_action_streams(profile: profile, message: "Force story analysis queued for #{profile.username}.")
-
end
-
format.json { head :accepted }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(params[:id]), alert: "Unable to queue force story analysis: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to queue force story analysis: #{e.message}" }
-
)
-
end
-
format.json { render json: { error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
def sync_stories_debug
-
profile = current_account.instagram_profiles.find(params[:id])
-
-
# Clean up existing debug files for this profile
-
cleanup_profile_debug_files(profile.username)
-
-
enqueue_profile_job(
-
profile: profile,
-
action: "sync_stories_debug",
-
job_class: SyncInstagramProfileStoriesJob,
-
extra_job_args: {
-
max_stories: 10,
-
force_analyze_all: false,
-
auto_reply: false
-
}
-
)
-
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "Debug story sync queued. HTML snapshots will be captured." }
-
format.turbo_stream do
-
render turbo_stream: queued_action_streams(profile: profile, message: "Debug story sync queued for #{profile.username}. HTML snapshots will be captured.")
-
end
-
format.json { head :accepted }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(params[:id]), alert: "Unable to queue debug story sync: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to queue debug story sync: #{e.message}" }
-
)
-
end
-
format.json { render json: { error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
private
-
-
def queued_action_streams(profile:, message:)
-
action_logs = profile.instagram_profile_action_logs.recent_first.limit(100)
-
[
-
turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: message }
-
),
-
turbo_stream.replace(
-
"action_history_section",
-
partial: "instagram_profiles/action_history_section",
-
locals: { action_logs: action_logs }
-
)
-
]
-
end
-
-
def enqueue_profile_job(profile:, action:, job_class:, extra_job_args: {})
-
log = profile.instagram_profile_action_logs.create!(
-
instagram_account: current_account,
-
action: action,
-
status: "queued",
-
trigger_source: "ui",
-
occurred_at: Time.current,
-
metadata: { requested_by: "InstagramProfileActionsController" }
-
)
-
-
begin
-
job = job_class.perform_later(
-
instagram_account_id: current_account.id,
-
instagram_profile_id: profile.id,
-
profile_action_log_id: log.id,
-
**extra_job_args
-
)
-
-
log.update!(
-
active_job_id: job.job_id,
-
queue_name: job.queue_name
-
)
-
annotate_queue_worker_health!(log: log)
-
rescue StandardError => e
-
log.mark_failed!(error_message: "Queueing failed: #{e.message}")
-
raise
-
end
-
end
-
-
def annotate_queue_worker_health!(log:)
-
return unless Rails.application.config.active_job.queue_adapter.to_s == "sidekiq"
-
-
require "sidekiq/api"
-
process_count = Sidekiq::ProcessSet.new.size
-
return unless process_count.zero?
-
-
metadata = log.metadata.is_a?(Hash) ? log.metadata : {}
-
log.update!(
-
metadata: metadata.merge(
-
"queue_worker_warning" => "No active Sidekiq worker process detected when job was enqueued.",
-
"queue_worker_warning_at" => Time.current.utc.iso8601(3)
-
)
-
)
-
Ops::StructuredLogger.warn(
-
event: "jobs.enqueued_without_workers",
-
payload: {
-
action_log_id: log.id,
-
action: log.action,
-
active_job_id: log.active_job_id,
-
queue_name: log.queue_name
-
}
-
)
-
rescue StandardError
-
nil
-
end
-
-
def cleanup_profile_debug_files(username)
-
debug_dirs = [
-
Rails.root.join("tmp", "story_debug_snapshots"),
-
Rails.root.join("tmp", "story_reel_debug")
-
]
-
-
debug_dirs.each do |dir|
-
next unless Dir.exist?(dir)
-
-
# Remove files matching the username pattern
-
Dir.glob(File.join(dir, "#{username}_*")).each do |file|
-
File.delete(file) if File.exist?(file)
-
end
-
end
-
end
-
end
-
class InstagramProfileMessagesController < ApplicationController
-
before_action :require_current_account!
-
-
def create
-
profile = current_account.instagram_profiles.find(params[:instagram_profile_id])
-
body = params.dig(:instagram_message, :body).to_s.strip
-
raise "Message cannot be blank" if body.blank?
-
-
message = current_account.instagram_messages.create!(
-
instagram_profile: profile,
-
direction: "outgoing",
-
body: body,
-
status: "queued"
-
)
-
-
SendInstagramMessageJob.perform_later(instagram_account_id: current_account.id, instagram_message_id: message.id)
-
-
respond_to do |format|
-
format.html { redirect_to instagram_profile_path(profile), notice: "Message queued for delivery." }
-
format.turbo_stream do
-
render turbo_stream: [
-
turbo_stream.prepend("messages", partial: "instagram_messages/row", locals: { message: message }),
-
turbo_stream.replace("message_form", partial: "instagram_messages/form", locals: { profile: profile, message: profile.instagram_messages.new })
-
]
-
end
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_to instagram_profile_path(params[:instagram_profile_id]), alert: "Send failed: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Send failed: #{e.message}" }
-
)
-
end
-
end
-
end
-
end
-
class InstagramProfilePostsController < ApplicationController
-
include ProfilePostPreviewSupport
-
-
before_action :require_current_account!
-
-
def analyze
-
profile = current_account.instagram_profiles.find(params[:instagram_profile_id])
-
post = profile.instagram_profile_posts.find(params[:id])
-
-
if analysis_in_progress?(post)
-
message = "Post analysis already running for #{post.shortcode}."
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: message }
-
format.turbo_stream do
-
profile_posts = profile.instagram_profile_posts.includes(:instagram_profile_post_comments, :ai_analyses, { instagram_post_faces: :instagram_story_person }, media_attachment: :blob, preview_image_attachment: :blob).recent_first.limit(100)
-
render turbo_stream: [
-
turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: message }
-
),
-
turbo_stream.replace(
-
"captured_profile_posts_section",
-
partial: "instagram_profiles/captured_posts_section",
-
locals: {
-
profile: profile,
-
profile_posts: profile_posts
-
}
-
)
-
]
-
end
-
format.json { render json: { message: message }, status: :accepted }
-
end
-
return
-
end
-
-
post.update!(ai_status: "pending", analyzed_at: nil)
-
-
task_flags = {
-
analyze_visual: boolean_param(params[:analyze_visual], default: true),
-
analyze_faces: boolean_param(params[:analyze_faces], default: true),
-
run_ocr: boolean_param(params[:run_ocr], default: true),
-
run_video: boolean_param(params[:run_video], default: true),
-
run_metadata: boolean_param(params[:run_metadata], default: true),
-
generate_comments: boolean_param(params[:generate_comments], default: true),
-
enforce_comment_evidence_policy: boolean_param(params[:enforce_comment_evidence_policy], default: false),
-
retry_on_incomplete_profile: boolean_param(params[:retry_on_incomplete_profile], default: false)
-
}
-
-
AnalyzeInstagramProfilePostJob.perform_later(
-
instagram_account_id: current_account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
task_flags: task_flags
-
)
-
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "Post analysis queued for #{post.shortcode}." }
-
format.turbo_stream do
-
profile_posts = profile.instagram_profile_posts.includes(:instagram_profile_post_comments, :ai_analyses, { instagram_post_faces: :instagram_story_person }, media_attachment: :blob, preview_image_attachment: :blob).recent_first.limit(100)
-
render turbo_stream: [
-
turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Post analysis queued for #{post.shortcode}." }
-
),
-
turbo_stream.replace(
-
"captured_profile_posts_section",
-
partial: "instagram_profiles/captured_posts_section",
-
locals: {
-
profile: profile,
-
profile_posts: profile_posts
-
}
-
)
-
]
-
end
-
format.json { head :accepted }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(params[:instagram_profile_id]), alert: "Unable to queue post analysis: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to queue post analysis: #{e.message}" }
-
)
-
end
-
format.json { render json: { error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
def analyze_next_batch
-
profile = current_account.instagram_profiles.find(params[:instagram_profile_id])
-
offset = params[:offset].to_i || 50
-
batch_size = 10
-
-
# Find unanalyzed posts starting from the offset
-
unanalyzed_posts = profile.instagram_profile_posts
-
.where.not(ai_status: "analyzed")
-
.or(profile.instagram_profile_posts.where(ai_status: nil))
-
.order(:taken_at)
-
.offset(offset)
-
.limit(batch_size)
-
-
if unanalyzed_posts.empty?
-
message = "No more posts to analyze."
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: message }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: message }
-
)
-
end
-
format.json { render json: { message: message }, status: :ok }
-
end
-
return
-
end
-
-
# Create action log for this batch
-
action_log = profile.instagram_profile_action_logs.create!(
-
instagram_account: current_account,
-
action: "analyze_profile_posts_batch",
-
status: "queued",
-
trigger_source: "ui",
-
occurred_at: Time.current,
-
metadata: {
-
requested_by: "InstagramProfilePostsController",
-
offset: offset,
-
batch_size: batch_size,
-
post_ids: unanalyzed_posts.pluck(:id),
-
analysis_batch: "next_#{batch_size}_from_#{offset}"
-
}
-
)
-
-
# Queue analysis job
-
job = AnalyzeCapturedInstagramProfilePostsJob.perform_later(
-
instagram_account_id: current_account.id,
-
instagram_profile_id: profile.id,
-
profile_action_log_id: action_log.id,
-
post_ids: unanalyzed_posts.pluck(:id),
-
refresh_profile_insights: false
-
)
-
action_log.update!(active_job_id: job.job_id, queue_name: job.queue_name)
-
-
message = "Queued analysis for next #{unanalyzed_posts.length} posts (posts #{offset + 1}-#{offset + unanalyzed_posts.length})."
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: message }
-
format.turbo_stream do
-
profile_posts = profile.instagram_profile_posts.includes(:instagram_profile_post_comments, :ai_analyses, { instagram_post_faces: :instagram_story_person }, media_attachment: :blob, preview_image_attachment: :blob).recent_first.limit(100)
-
render turbo_stream: [
-
turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: message }
-
),
-
turbo_stream.replace(
-
"captured_profile_posts_section",
-
partial: "instagram_profiles/captured_posts_section",
-
locals: {
-
profile: profile,
-
profile_posts: profile_posts
-
}
-
)
-
]
-
end
-
format.json { render json: { message: message, job_id: job.job_id }, status: :accepted }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(params[:instagram_profile_id]), alert: "Unable to queue batch analysis: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to queue batch analysis: #{e.message}" }
-
)
-
end
-
format.json { render json: { error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
def forward_comment
-
profile = current_account.instagram_profiles.find(params[:instagram_profile_id])
-
post = profile.instagram_profile_posts.find(params[:id])
-
comment_text = params[:comment].to_s.strip
-
raise "Comment cannot be blank" if comment_text.blank?
-
-
media_id = post.metadata.is_a?(Hash) ? post.metadata["media_id"].to_s.strip : ""
-
raise "Media id missing for this post. Re-run profile analysis to refresh post metadata." if media_id.blank?
-
-
action_log = profile.instagram_profile_action_logs.create!(
-
instagram_account: current_account,
-
action: "post_comment",
-
status: "queued",
-
trigger_source: "ui",
-
occurred_at: Time.current,
-
metadata: {
-
requested_by: "InstagramProfilePostsController",
-
post_shortcode: post.shortcode,
-
media_id: media_id,
-
comment_text: comment_text
-
}
-
)
-
-
job = PostInstagramProfileCommentJob.perform_later(
-
instagram_account_id: current_account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
comment_text: comment_text,
-
media_id: media_id,
-
profile_action_log_id: action_log.id
-
)
-
-
action_log.update!(active_job_id: job.job_id, queue_name: job.queue_name)
-
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(profile), notice: "Comment queued for #{post.shortcode}." }
-
format.turbo_stream do
-
action_logs = profile.instagram_profile_action_logs.recent_first.limit(100)
-
render turbo_stream: [
-
turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Comment queued for #{post.shortcode}." }
-
),
-
turbo_stream.replace(
-
"action_history_section",
-
partial: "instagram_profiles/action_history_section",
-
locals: { action_logs: action_logs }
-
)
-
]
-
end
-
format.json { head :accepted }
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_back fallback_location: instagram_profile_path(params[:instagram_profile_id]), alert: "Unable to queue comment: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to queue comment: #{e.message}" }
-
)
-
end
-
format.json { render json: { error: e.message }, status: :unprocessable_entity }
-
end
-
end
-
-
private
-
-
def boolean_param(value, default:)
-
return default if value.nil?
-
-
ActiveModel::Type::Boolean.new.cast(value)
-
end
-
-
def analysis_in_progress?(post)
-
metadata = post.metadata
-
return false unless metadata.is_a?(Hash)
-
-
pipeline = metadata["ai_pipeline"]
-
return false unless pipeline.is_a?(Hash)
-
return false unless pipeline["status"].to_s == "running"
-
-
required_steps = Array(pipeline["required_steps"]).map(&:to_s)
-
return false if required_steps.empty?
-
-
terminal_statuses = Ai::PostAnalysisPipelineState::TERMINAL_STATUSES
-
required_steps.any? do |step|
-
!terminal_statuses.include?(pipeline.dig("steps", step, "status").to_s)
-
end
-
rescue StandardError
-
false
-
end
-
end
-
class InstagramProfilesController < ApplicationController
-
include ProfilePostPreviewSupport
-
-
before_action :require_current_account!
-
before_action :set_account_and_profile!, only: %i[
-
show
-
events
-
tags
-
captured_posts_section
-
downloaded_stories_section
-
messages_section
-
action_history_section
-
events_table_section
-
]
-
-
def index
-
@account = current_account
-
-
query_result = InstagramProfiles::ProfilesIndexQuery.new(account: @account, params: params).call
-
@q = query_result.q
-
@filter = query_result.filter
-
@page = query_result.page
-
@per_page = query_result.per_page
-
@total = query_result.total
-
@pages = query_result.pages
-
@profiles = query_result.profiles
-
-
@latest_sync_run = @account.sync_runs.order(created_at: :desc).first
-
@counts = {
-
total: @account.instagram_profiles.count,
-
mutuals: @account.instagram_profiles.where(following: true, follows_you: true).count,
-
following: @account.instagram_profiles.where(following: true).count,
-
followers: @account.instagram_profiles.where(follows_you: true).count
-
}
-
-
respond_to do |format|
-
format.html
-
format.json do
-
render json: InstagramProfiles::TabulatorProfilesPayloadBuilder.new(
-
profiles: @profiles,
-
total: @total,
-
pages: @pages,
-
view_context: view_context
-
).call
-
end
-
end
-
end
-
-
def show
-
snapshot = InstagramProfiles::ShowSnapshotService.new(account: @account, profile: @profile).call
-
-
@profile_posts_total_count = snapshot[:profile_posts_total_count]
-
@deleted_posts_count = snapshot[:deleted_posts_count]
-
@active_posts_count = snapshot[:active_posts_count]
-
@analyzed_posts_count = snapshot[:analyzed_posts_count]
-
@pending_posts_count = snapshot[:pending_posts_count]
-
@messages_count = snapshot[:messages_count]
-
@action_logs_count = snapshot[:action_logs_count]
-
@new_message = @profile.instagram_messages.new
-
@latest_analysis = snapshot[:latest_analysis]
-
@latest_story_intelligence_event = snapshot[:latest_story_intelligence_event]
-
@available_tags = snapshot[:available_tags]
-
@history_build_state = snapshot[:history_build_state]
-
@history_ready = snapshot[:history_ready]
-
@mutual_profiles = snapshot[:mutual_profiles]
-
end
-
-
def captured_posts_section
-
profile_posts =
-
@profile.instagram_profile_posts
-
.includes(:instagram_profile_post_comments, :ai_analyses, { instagram_post_faces: :instagram_story_person }, media_attachment: :blob, preview_image_attachment: :blob)
-
.recent_first
-
.limit(40)
-
-
render_profile_frame(
-
frame_id: "profile_captured_posts_#{@profile.id}",
-
partial: "instagram_profiles/captured_posts_section",
-
locals: { profile: @profile, profile_posts: profile_posts }
-
)
-
end
-
-
def downloaded_stories_section
-
downloaded_story_events =
-
@profile.instagram_profile_events
-
.joins(:media_attachment)
-
.with_attached_media
-
.with_attached_preview_image
-
.where(kind: InstagramProfileEvent::STORY_ARCHIVE_EVENT_KINDS)
-
.order(detected_at: :desc, id: :desc)
-
.limit(18)
-
-
render_profile_frame(
-
frame_id: "profile_downloaded_stories_#{@profile.id}",
-
partial: "instagram_profiles/downloaded_stories_section",
-
locals: { profile: @profile, downloaded_story_events: downloaded_story_events }
-
)
-
end
-
-
def messages_section
-
messages = @profile.instagram_messages.recent_first.limit(120)
-
render_profile_frame(
-
frame_id: "profile_messages_#{@profile.id}",
-
partial: "instagram_profiles/messages_section",
-
locals: { messages: messages }
-
)
-
end
-
-
def action_history_section
-
action_logs = @profile.instagram_profile_action_logs.recent_first.limit(100)
-
render_profile_frame(
-
frame_id: "profile_actions_#{@profile.id}",
-
partial: "instagram_profiles/action_history_section",
-
locals: { action_logs: action_logs }
-
)
-
end
-
-
def events_table_section
-
render_profile_frame(
-
frame_id: "profile_events_table_#{@profile.id}",
-
partial: "instagram_profiles/events_table_section",
-
locals: { profile: @profile }
-
)
-
end
-
-
def events
-
query_result = InstagramProfiles::EventsQuery.new(profile: @profile, params: params).call
-
-
render json: InstagramProfiles::TabulatorEventsPayloadBuilder.new(
-
events: query_result.events,
-
total: query_result.total,
-
pages: query_result.pages,
-
view_context: view_context
-
).call
-
end
-
-
def tags
-
names = Array(params[:tag_names]).map { |tag| tag.to_s.strip.downcase }.reject(&:blank?)
-
custom = params[:custom_tags].to_s.split(/[,\n]/).map { |tag| tag.to_s.strip.downcase }.reject(&:blank?)
-
desired = (names + custom).uniq
-
-
tags = desired.map { |name| ProfileTag.find_or_create_by!(name: name) }
-
-
@profile.profile_tags = tags
-
@profile.save!
-
-
respond_to do |format|
-
format.html { redirect_to instagram_profile_path(@profile), notice: "Tags updated." }
-
format.turbo_stream do
-
render turbo_stream: [
-
turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Tags updated." }
-
),
-
turbo_stream.replace(
-
"profile_tags_section",
-
partial: "instagram_profiles/profile_tags_section",
-
locals: {
-
profile: @profile,
-
available_tags: InstagramProfiles::ShowSnapshotService::AVAILABLE_TAGS
-
}
-
)
-
]
-
end
-
end
-
rescue StandardError => e
-
respond_to do |format|
-
format.html { redirect_to instagram_profile_path(params[:id]), alert: "Unable to update tags: #{e.message}" }
-
format.turbo_stream do
-
render turbo_stream: turbo_stream.append(
-
"notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Unable to update tags: #{e.message}" }
-
)
-
end
-
end
-
end
-
-
private
-
-
def set_account_and_profile!
-
@account = current_account
-
@profile = @account.instagram_profiles.find(params[:id])
-
end
-
-
def render_profile_frame(frame_id:, partial:, locals:)
-
body = render_to_string(partial: partial, locals: locals)
-
if turbo_frame_request?
-
render html: view_context.turbo_frame_tag(frame_id) { body.html_safe }
-
else
-
render html: body.html_safe
-
end
-
end
-
end
-
class InstagramStoryPeopleController < ApplicationController
-
before_action :require_current_account!
-
before_action :set_profile
-
before_action :set_person, only: %i[
-
show
-
confirm
-
mark_incorrect
-
link_profile_owner
-
merge
-
separate_face
-
]
-
before_action :set_feedback_service, only: %i[
-
confirm
-
mark_incorrect
-
link_profile_owner
-
merge
-
separate_face
-
]
-
-
def show
-
@post_groups = grouped_post_faces(@person)
-
@story_groups = grouped_story_faces(@person)
-
@post_face_count = @person.instagram_post_faces.count
-
@story_face_count = @person.instagram_story_faces.count
-
@total_appearances = @post_face_count + @story_face_count
-
@merge_candidates = @profile.instagram_story_people.recently_seen.where.not(id: @person.id).limit(80)
-
end
-
-
def confirm
-
@feedback_service.confirm_person!(
-
person: @person,
-
label: params[:label],
-
real_person_status: params[:real_person_status]
-
)
-
redirect_to person_path(@person), notice: "Identity confirmed for #{@person.display_label}."
-
rescue StandardError => e
-
redirect_to person_path(@person), alert: "Unable to confirm identity: #{e.message}"
-
end
-
-
def mark_incorrect
-
@feedback_service.mark_incorrect!(
-
person: @person,
-
reason: params[:reason]
-
)
-
redirect_to person_path(@person), notice: "#{@person.display_label} was marked as incorrect."
-
rescue StandardError => e
-
redirect_to person_path(@person), alert: "Unable to mark person as incorrect: #{e.message}"
-
end
-
-
def link_profile_owner
-
@feedback_service.link_profile_owner!(person: @person)
-
redirect_to person_path(@person), notice: "#{@person.display_label} is now linked as the profile owner."
-
rescue StandardError => e
-
redirect_to person_path(@person), alert: "Unable to set profile owner link: #{e.message}"
-
end
-
-
def merge
-
target_person = @profile.instagram_story_people.find(params[:target_person_id])
-
@feedback_service.merge_people!(source_person: @person, target_person: target_person)
-
redirect_to person_path(target_person), notice: "Merged #{@person.display_label} into #{target_person.display_label}."
-
rescue StandardError => e
-
redirect_to person_path(@person), alert: "Unable to merge identities: #{e.message}"
-
end
-
-
def separate_face
-
face = find_face!(params[:face_type], params[:face_id])
-
new_person = @feedback_service.separate_face!(person: @person, face: face)
-
redirect_to person_path(new_person), notice: "Created #{new_person.display_label} from a separated detection."
-
rescue StandardError => e
-
redirect_to person_path(@person), alert: "Unable to separate detection: #{e.message}"
-
end
-
-
private
-
-
def set_profile
-
@profile = current_account.instagram_profiles.find(params[:instagram_profile_id])
-
end
-
-
def set_person
-
@person = @profile.instagram_story_people.find(params[:id])
-
end
-
-
def set_feedback_service
-
@feedback_service = PersonIdentityFeedbackService.new
-
end
-
-
def person_path(person)
-
instagram_profile_instagram_story_person_path(@profile, person)
-
end
-
-
def find_face!(face_type, face_id)
-
token = face_type.to_s.strip
-
id = face_id.to_i
-
raise ActiveRecord::RecordNotFound, "Face id missing" unless id.positive?
-
-
if token == "story"
-
InstagramStoryFace
-
.joins(:instagram_story)
-
.where(instagram_stories: { instagram_profile_id: @profile.id })
-
.find(id)
-
else
-
InstagramPostFace
-
.joins(:instagram_profile_post)
-
.where(instagram_profile_posts: { instagram_profile_id: @profile.id })
-
.find(id)
-
end
-
end
-
-
def grouped_post_faces(person)
-
faces = person.instagram_post_faces
-
.includes(instagram_profile_post: [ media_attachment: :blob, preview_image_attachment: :blob ])
-
.order(created_at: :desc)
-
.limit(240)
-
.to_a
-
-
grouped_faces(
-
faces: faces,
-
owner_key: :instagram_profile_post_id,
-
count_rows: InstagramPostFace
-
.where(instagram_profile_post_id: faces.map(&:instagram_profile_post_id).uniq)
-
.where.not(instagram_story_person_id: nil)
-
.pluck(:instagram_profile_post_id, :instagram_story_person_id)
-
)
-
end
-
-
def grouped_story_faces(person)
-
faces = person.instagram_story_faces
-
.includes(instagram_story: [ media_attachment: :blob ])
-
.order(created_at: :desc)
-
.limit(240)
-
.to_a
-
-
grouped_faces(
-
faces: faces,
-
owner_key: :instagram_story_id,
-
count_rows: InstagramStoryFace
-
.where(instagram_story_id: faces.map(&:instagram_story_id).uniq)
-
.where.not(instagram_story_person_id: nil)
-
.pluck(:instagram_story_id, :instagram_story_person_id)
-
)
-
end
-
-
def grouped_faces(faces:, owner_key:, count_rows:)
-
return [] if faces.empty?
-
-
people_count_by_owner = count_rows
-
.group_by(&:first)
-
.transform_values { |rows| rows.map(&:last).uniq.size }
-
-
faces
-
.group_by(&owner_key)
-
.map do |owner_id, row_faces|
-
owner = row_faces.first.public_send(owner_key.to_s.sub(/_id\z/, ""))
-
next unless owner
-
-
total_people = people_count_by_owner[owner_id].to_i
-
{
-
owner: owner,
-
faces: row_faces.first(8),
-
face_count_for_person: row_faces.length,
-
total_people: total_people,
-
scope: total_people > 1 ? "multiple_people" : "single_person",
-
occurred_at: owner.respond_to?(:taken_at) ? owner.taken_at : nil
-
}
-
end
-
.compact
-
.sort_by { |row| [ row[:occurred_at] || Time.at(0), row[:owner].id ] }
-
.reverse
-
end
-
end
-
class WorkspacesController < ApplicationController
-
include ProfilePostPreviewSupport
-
-
before_action :require_current_account!
-
-
DEFAULT_QUEUE_LIMIT = 40
-
-
def actions
-
@account = resolved_account
-
@queue_result = load_queue_result(account: @account)
-
end
-
-
def actions_feed
-
account = resolved_account
-
queue_result = load_queue_result(account: account)
-
-
render partial: "workspaces/actions_queue_section", locals: { account: account, queue_result: queue_result }
-
rescue StandardError => e
-
render html: view_context.content_tag(:p, "Unable to refresh workspace queue: #{e.message}", class: "meta"), status: :unprocessable_entity
-
end
-
-
private
-
-
def resolved_account
-
requested_id = params[:instagram_account_id].to_i
-
return current_account if requested_id <= 0
-
-
current_account.id == requested_id ? current_account : current_account.class.find(requested_id)
-
rescue StandardError
-
current_account
-
end
-
-
def load_queue_result(account:)
-
Workspace::ActionsTodoQueueService.new(
-
account: account,
-
limit: params.fetch(:limit, DEFAULT_QUEUE_LIMIT),
-
enqueue_processing: true
-
).fetch!
-
end
-
end
-
1
module AiDashboardHelper
-
1
def get_default_test_for_service(service)
-
case service.to_s
-
when: 0
when 'vision'
-
'labels'
-
when: 0
when 'face'
-
'detection'
-
when: 0
when 'ocr'
-
'text_extraction'
-
when: 0
when 'whisper'
-
'transcription'
-
when: 0
when 'video'
-
'analysis'
-
else: 0
else
-
'basic'
-
end
-
end
-
end
-
1
module ApplicationHelper
-
1
def relative_time_with_tooltip(value, blank: "-")
-
then: 0
else: 0
return blank if value.blank?
-
-
time = value.in_time_zone
-
relative =
-
then: 0
if time <= Time.current
-
"#{time_ago_in_words(time)} ago"
-
else: 0
else
-
"in #{time_ago_in_words(time)}"
-
end
-
-
content_tag(
-
:time,
-
relative,
-
datetime: time.iso8601,
-
title: time.strftime("%Y-%m-%d %H:%M:%S %Z")
-
)
-
end
-
-
1
def top_nav_link_to(name = nil, path = nil, section:, **options, &block)
-
then: 0
else: 0
if block_given?
-
path = name
-
name = capture(&block)
-
end
-
-
active = top_nav_active?(section)
-
classes = [ "nav-link", options.delete(:class) ]
-
then: 0
else: 0
classes << "active" if active
-
-
aria_options = (options.delete(:aria) || {}).dup
-
then: 0
else: 0
aria_options[:current] = "page" if active
-
-
link_to name, path, **options.merge(class: classes.compact.join(" "), aria: aria_options)
-
end
-
-
1
def get_default_test_for_service(service)
-
case service.to_s
-
when: 0
when 'vision'
-
'labels'
-
when: 0
when 'face'
-
'detection'
-
when: 0
when 'ocr'
-
'text_extraction'
-
when: 0
when 'whisper'
-
'transcription'
-
when: 0
when 'video'
-
'analysis'
-
else: 0
else
-
'basic'
-
end
-
end
-
-
1
def ai_dashboard_path
-
ai_dashboard_index_path
-
end
-
-
1
def current_section
-
case controller_path
-
when: 0
when "instagram_accounts"
-
:accounts
-
when: 0
when "instagram_profiles", "instagram_profile_actions", "instagram_profile_posts", "instagram_profile_messages", "instagram_story_people"
-
:profiles
-
when: 0
when "instagram_posts"
-
:posts
-
when: 0
when "workspaces"
-
:workspace_actions
-
when: 0
when "ai_dashboard"
-
:ai_dashboard
-
when: 0
when "admin/background_jobs"
-
then: 0
if action_name == "dashboard" || request.path.start_with?("/admin/jobs")
-
else: 0
:jobs
-
then: 0
else: 0
elsif %w[failures failure].include?(action_name)
-
:failures
-
end
-
when: 0
when "admin/issues"
-
:issues
-
when: 0
when "admin/storage_ingestions"
-
:storage
-
else: 0
else
-
nil
-
end
-
end
-
-
1
private
-
-
1
def top_nav_active?(section)
-
case section
-
when: 0
when :accounts
-
controller_path == "instagram_accounts"
-
when: 0
when :profiles
-
%w[
-
instagram_profiles
-
instagram_profile_actions
-
instagram_profile_posts
-
instagram_profile_messages
-
instagram_story_people
-
].include?(controller_path)
-
when: 0
when :posts
-
controller_path == "instagram_posts"
-
when: 0
when :workspace_actions
-
controller_path == "workspaces"
-
when: 0
when :ai_dashboard
-
controller_path == "ai_dashboard"
-
when: 0
when :jobs
-
request.path.start_with?("/admin/jobs") || (controller_path == "admin/background_jobs" && action_name == "dashboard")
-
when: 0
when :failures
-
controller_path == "admin/background_jobs" && %w[failures failure].include?(action_name)
-
when: 0
when :issues
-
controller_path == "admin/issues"
-
when: 0
when :storage
-
controller_path == "admin/storage_ingestions"
-
else: 0
else
-
false
-
end
-
end
-
end
-
1
module DashboardHelper
-
end
-
1
module InstagramAccountsHelper
-
end
-
1
module MessagesHelper
-
end
-
class AnalyzeCapturedInstagramProfilePostsJob < ApplicationJob
-
queue_as :ai
-
-
DEFAULT_BATCH_SIZE = 6
-
MAX_BATCH_SIZE = 20
-
-
def perform(
-
instagram_account_id:,
-
instagram_profile_id:,
-
profile_action_log_id: nil,
-
post_ids: nil,
-
batch_size: DEFAULT_BATCH_SIZE,
-
refresh_profile_insights: true,
-
total_candidates: nil
-
)
-
account = InstagramAccount.find(instagram_account_id)
-
profile = account.instagram_profiles.find(instagram_profile_id)
-
action_log = find_or_create_action_log(
-
account: account,
-
profile: profile,
-
profile_action_log_id: profile_action_log_id
-
)
-
-
ids = normalize_post_ids(profile: profile, post_ids: post_ids)
-
if ids.empty?
-
action_log.mark_succeeded!(
-
extra_metadata: { skipped: true, reason: "no_candidate_posts", queue_name: queue_name, active_job_id: job_id },
-
log_text: "No candidate posts required analysis."
-
)
-
return
-
end
-
-
policy_decision = Instagram::ProfileScanPolicy.new(profile: profile).decision
-
if policy_decision[:skip_post_analysis]
-
mark_posts_as_policy_skipped!(profile: profile, ids: ids, decision: policy_decision)
-
action_log.mark_succeeded!(
-
extra_metadata: {
-
skipped: true,
-
reason: "profile_scan_policy_blocked",
-
skip_reason_code: policy_decision[:reason_code],
-
skip_reason: policy_decision[:reason],
-
followers_count: policy_decision[:followers_count],
-
max_followers: policy_decision[:max_followers],
-
skipped_posts_count: ids.length
-
},
-
log_text: "Skipped post analysis: #{policy_decision[:reason]}"
-
)
-
return
-
end
-
-
batch_size_i = batch_size.to_i.clamp(1, MAX_BATCH_SIZE)
-
total_candidates_i = total_candidates.to_i.positive? ? total_candidates.to_i : ids.length
-
current_batch_ids = ids.first(batch_size_i)
-
remaining_ids = ids.drop(batch_size_i)
-
-
action_log.mark_running!(
-
extra_metadata: {
-
queue_name: queue_name,
-
active_job_id: job_id,
-
batch_size: batch_size_i,
-
current_batch_count: current_batch_ids.length,
-
remaining_count: remaining_ids.length
-
}
-
)
-
-
analyzed_now = 0
-
skipped_now = 0
-
failed_now = []
-
-
current_batch_ids.each do |post_id|
-
post = profile.instagram_profile_posts.find_by(id: post_id)
-
next unless post
-
-
if post.ai_status.to_s == "analyzed" && post.analyzed_at.present?
-
skipped_now += 1
-
next
-
end
-
-
AnalyzeInstagramProfilePostJob.perform_now(
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
pipeline_mode: "inline",
-
task_flags: {
-
generate_comments: false,
-
enforce_comment_evidence_policy: false,
-
retry_on_incomplete_profile: false
-
}
-
)
-
analyzed_now += 1
-
rescue StandardError => e
-
failed_now << {
-
post_id: post_id,
-
shortcode: post&.shortcode.to_s.presence,
-
error_class: e.class.name,
-
error_message: e.message.to_s.byteslice(0, 220)
-
}.compact
-
next
-
end
-
-
state = merged_queue_state(
-
action_log: action_log,
-
total_candidates: total_candidates_i,
-
processed_increment: current_batch_ids.length,
-
analyzed_increment: analyzed_now,
-
skipped_increment: skipped_now,
-
failed_rows: failed_now,
-
remaining_count: remaining_ids.length
-
)
-
-
if remaining_ids.any?
-
next_job = self.class.perform_later(
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
profile_action_log_id: action_log.id,
-
post_ids: remaining_ids,
-
batch_size: batch_size_i,
-
refresh_profile_insights: refresh_profile_insights,
-
total_candidates: total_candidates_i
-
)
-
state["next_job_id"] = next_job.job_id
-
action_log.mark_running!(extra_metadata: { analysis_queue_state: state, active_job_id: next_job.job_id, queue_name: next_job.queue_name })
-
return
-
end
-
-
refresh_job = nil
-
if ActiveModel::Type::Boolean.new.cast(refresh_profile_insights) && state["analyzed_count"].to_i.positive?
-
refresh_job = AnalyzeInstagramProfileJob.perform_later(
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id
-
)
-
end
-
-
action_log.mark_succeeded!(
-
extra_metadata: {
-
analysis_queue_state: state,
-
refresh_profile_insights: ActiveModel::Type::Boolean.new.cast(refresh_profile_insights),
-
profile_insights_refresh_job_id: refresh_job&.job_id
-
},
-
log_text: "Post analysis completed. analyzed=#{state['analyzed_count']}, skipped=#{state['skipped_count']}, failed=#{state['failed_count']}."
-
)
-
rescue StandardError => e
-
action_log&.mark_failed!(error_message: e.message, extra_metadata: { active_job_id: job_id, queue_name: queue_name })
-
raise
-
end
-
-
private
-
-
def find_or_create_action_log(account:, profile:, profile_action_log_id:)
-
log = profile.instagram_profile_action_logs.find_by(id: profile_action_log_id) if profile_action_log_id.present?
-
return log if log
-
-
profile.instagram_profile_action_logs.create!(
-
instagram_account: account,
-
action: "analyze_profile_posts",
-
status: "queued",
-
trigger_source: "job",
-
occurred_at: Time.current,
-
active_job_id: job_id,
-
queue_name: queue_name,
-
metadata: { created_by: self.class.name }
-
)
-
end
-
-
def normalize_post_ids(profile:, post_ids:)
-
ids = Array(post_ids).map(&:to_i).select(&:positive?).uniq
-
return ids if ids.any?
-
-
profile.instagram_profile_posts.pending_ai.recent_first.limit(200).pluck(:id)
-
end
-
-
def merged_queue_state(action_log:, total_candidates:, processed_increment:, analyzed_increment:, skipped_increment:, failed_rows:, remaining_count:)
-
metadata = action_log.metadata.is_a?(Hash) ? action_log.metadata : {}
-
raw = metadata["analysis_queue_state"].is_a?(Hash) ? metadata["analysis_queue_state"] : {}
-
previous_failed_rows = Array(raw["failed_posts"]).select { |row| row.is_a?(Hash) }
-
-
{
-
"total_candidates" => [raw["total_candidates"].to_i, total_candidates.to_i].max,
-
"processed_count" => raw["processed_count"].to_i + processed_increment.to_i,
-
"analyzed_count" => raw["analyzed_count"].to_i + analyzed_increment.to_i,
-
"skipped_count" => raw["skipped_count"].to_i + skipped_increment.to_i,
-
"failed_count" => raw["failed_count"].to_i + Array(failed_rows).length,
-
"remaining_count" => remaining_count.to_i,
-
"failed_posts" => (previous_failed_rows + Array(failed_rows)).first(30),
-
"updated_at" => Time.current.iso8601
-
}
-
end
-
-
def mark_posts_as_policy_skipped!(profile:, ids:, decision:)
-
profile.instagram_profile_posts.where(id: Array(ids).map(&:to_i).select(&:positive?)).find_each do |post|
-
Instagram::ProfileScanPolicy.mark_post_analysis_skipped!(post: post, decision: decision)
-
rescue StandardError
-
next
-
end
-
end
-
end
-
class AnalyzeInstagramPostJob < ApplicationJob
-
require "base64"
-
require "digest"
-
require "uri"
-
-
queue_as :ai
-
-
MAX_INLINE_IMAGE_BYTES = 2 * 1024 * 1024
-
MAX_INLINE_VIDEO_BYTES = 10 * 1024 * 1024
-
-
def perform(instagram_post_id:)
-
post = InstagramPost.find(instagram_post_id)
-
account = post.instagram_account
-
-
# Resolve an existing profile record for tag rules, if available.
-
if post.instagram_profile_id.nil? && post.author_username.to_s.strip.present?
-
post.instagram_profile = account.instagram_profiles.find_by(username: post.author_username)
-
post.save! if post.changed?
-
end
-
-
payload = build_payload(post)
-
media = build_media_payload(post)
-
run = Ai::Runner.new(account: account).analyze!(
-
purpose: "post",
-
analyzable: post,
-
payload: payload,
-
media: media,
-
media_fingerprint: media_fingerprint_for(post: post, media: media)
-
)
-
result = run[:result]
-
-
post.update!(
-
status: "analyzed",
-
analyzed_at: Time.current,
-
ai_provider: run[:provider].key,
-
ai_model: result[:model],
-
analysis: result[:analysis]
-
)
-
-
relevant = ActiveModel::Type::Boolean.new.cast(post.analysis&.dig("relevant"))
-
unless relevant
-
post.update!(status: "ignored", purge_at: 24.hours.from_now)
-
end
-
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Post analyzed via #{run[:provider].display_name}: #{post.shortcode} (#{relevant ? 'relevant' : 'ignored'})." }
-
)
-
rescue StandardError => e
-
post ||= InstagramPost.where(id: instagram_post_id).first
-
account ||= post&.instagram_account
-
-
post&.update!(status: "pending") # allow retry
-
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Post analysis failed: #{e.message}" }
-
) if account
-
-
raise
-
end
-
-
private
-
-
def build_payload(post)
-
profile = post.instagram_profile
-
-
{
-
post: {
-
shortcode: post.shortcode,
-
kind: post.post_kind,
-
author_username: post.author_username,
-
caption: post.caption,
-
taken_at: post.taken_at&.iso8601,
-
detected_at: post.detected_at&.iso8601,
-
permalink: post.permalink
-
},
-
author_profile: profile ? {
-
username: profile.username,
-
display_name: profile.display_name,
-
bio: profile.bio,
-
tags: profile.profile_tags.pluck(:name).sort,
-
following: profile.following,
-
follows_you: profile.follows_you,
-
mutual: profile.mutual?
-
} : nil,
-
rules: {
-
# Basic tag-based gates. The AI should treat these as hard preferences.
-
ignore_if_tagged: %w[relative page excluded],
-
prefer_interact_if_tagged: %w[female_friend male_friend friend personal_user],
-
require_manual_review: true
-
}
-
}
-
end
-
-
def build_media_payload(post)
-
return { type: "none" } unless post.media.attached?
-
-
blob = post.media.blob
-
return { type: "none" } unless blob
-
-
content_type = blob.content_type.to_s
-
return { type: "none", content_type: content_type } if blob.byte_size.to_i <= 0
-
-
if content_type.start_with?("image/")
-
return { type: "image", content_type: content_type } if blob.byte_size.to_i > MAX_INLINE_IMAGE_BYTES
-
-
data = blob.download
-
encoded = Base64.strict_encode64(data)
-
-
return {
-
type: "image",
-
content_type: content_type,
-
bytes: data,
-
image_data_url: "data:#{content_type};base64,#{encoded}"
-
}
-
end
-
-
if content_type.start_with?("video/")
-
return { type: "none", content_type: content_type, media_skipped_reason: "video_too_large" } if blob.byte_size.to_i > MAX_INLINE_VIDEO_BYTES
-
-
return {
-
type: "video",
-
content_type: content_type,
-
reference_id: "instagram_post_#{post.id}",
-
bytes: blob.download
-
}
-
end
-
-
{ type: "none", content_type: content_type }
-
rescue StandardError
-
{ type: "none" }
-
end
-
-
def media_fingerprint_for(post:, media:)
-
if post.media.attached?
-
checksum = post.media.blob&.checksum.to_s
-
return "blob:#{checksum}" if checksum.present?
-
end
-
-
normalized_url = normalize_url(post.media_url)
-
return Digest::SHA256.hexdigest(normalized_url) if normalized_url.present?
-
-
bytes = media[:bytes]
-
return Digest::SHA256.hexdigest(bytes) if bytes.present?
-
-
nil
-
end
-
-
def normalize_url(raw)
-
value = raw.to_s.strip
-
return nil if value.blank?
-
-
uri = URI.parse(value)
-
return value unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
-
-
"#{uri.scheme}://#{uri.host}#{uri.path}"
-
rescue StandardError
-
value
-
end
-
end
-
class AnalyzeInstagramProfileJob < ApplicationJob
-
require "base64"
-
require "digest"
-
-
queue_as :ai
-
MAX_AI_IMAGE_COUNT = 5
-
MAX_PROFILE_IMAGE_DESCRIPTION_COUNT = 5
-
MAX_INLINE_IMAGE_BYTES = 2 * 1024 * 1024
-
-
def perform(instagram_account_id:, instagram_profile_id:, profile_action_log_id: nil)
-
account = InstagramAccount.find(instagram_account_id)
-
profile = account.instagram_profiles.find(instagram_profile_id)
-
action_log = find_or_create_action_log(
-
account: account,
-
profile: profile,
-
action: "analyze_profile",
-
profile_action_log_id: profile_action_log_id
-
)
-
action_log.mark_running!(extra_metadata: { queue_name: queue_name, active_job_id: job_id })
-
-
policy_decision = Instagram::ProfileScanPolicy.new(profile: profile).decision
-
if policy_decision[:skip_post_analysis]
-
if policy_decision[:reason_code].to_s == "non_personal_profile_page" || policy_decision[:reason_code].to_s == "scan_excluded_tag"
-
Instagram::ProfileScanPolicy.mark_scan_excluded!(profile: profile)
-
end
-
-
action_log.mark_succeeded!(
-
extra_metadata: {
-
skipped: true,
-
reason: "profile_scan_policy_blocked",
-
skip_reason_code: policy_decision[:reason_code],
-
skip_reason: policy_decision[:reason],
-
followers_count: policy_decision[:followers_count],
-
max_followers: policy_decision[:max_followers]
-
},
-
log_text: "Skipped profile AI analysis: #{policy_decision[:reason]}"
-
)
-
return
-
end
-
-
collected = Instagram::ProfileAnalysisCollector.new(account: account, profile: profile).collect_and_persist!(
-
posts_limit: nil,
-
comments_limit: 20
-
)
-
described_posts = enrich_first_profile_images!(account: account, profile: profile, collected_posts: collected[:posts])
-
accepted_media_context = build_accepted_media_context(profile: profile)
-
-
payload = build_profile_payload(
-
profile: profile,
-
collected_posts: collected[:posts],
-
described_posts: described_posts,
-
accepted_media_context: accepted_media_context
-
)
-
media = build_media_inputs(profile: profile, collected_posts: described_posts)
-
-
run = Ai::Runner.new(account: account).analyze!(
-
purpose: "profile",
-
analyzable: profile,
-
payload: payload,
-
media: media
-
)
-
update_profile_demographics_from_analysis!(profile: profile, analysis: run.dig(:result, :analysis))
-
aggregate_demographics_from_accumulated_json!(
-
account: account,
-
profile: profile,
-
latest_profile_analysis: run.dig(:result, :analysis),
-
accepted_media_context: accepted_media_context
-
)
-
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "AI analysis completed for #{profile.username} via #{run[:provider].display_name}." }
-
)
-
action_log.mark_succeeded!(
-
extra_metadata: { provider: run[:provider].key, provider_name: run[:provider].display_name },
-
log_text: "AI analysis completed via #{run[:provider].display_name}"
-
)
-
rescue StandardError => e
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "AI analysis failed: #{e.message}" }
-
) if account
-
action_log&.mark_failed!(error_message: e.message, extra_metadata: { active_job_id: job_id })
-
-
raise
-
end
-
-
private
-
-
def find_or_create_action_log(account:, profile:, action:, profile_action_log_id:)
-
log = profile.instagram_profile_action_logs.find_by(id: profile_action_log_id) if profile_action_log_id.present?
-
return log if log
-
-
profile.instagram_profile_action_logs.create!(
-
instagram_account: account,
-
action: action,
-
status: "queued",
-
trigger_source: "job",
-
occurred_at: Time.current,
-
active_job_id: job_id,
-
queue_name: queue_name,
-
metadata: { created_by: self.class.name }
-
)
-
end
-
-
def build_profile_payload(profile:, collected_posts:, described_posts:, accepted_media_context:)
-
history_narrative = profile.history_narrative_text(max_chunks: 4)
-
history_chunks = profile.history_narrative_chunks(max_chunks: 8)
-
-
recent_messages =
-
profile.instagram_messages
-
.where(direction: "outgoing")
-
.order(created_at: :desc)
-
.limit(20)
-
.pluck(:body, :created_at, :sent_at, :status)
-
.map do |body, created_at, sent_at, status|
-
{
-
body: body,
-
created_at: created_at&.iso8601,
-
sent_at: sent_at&.iso8601,
-
status: status
-
}
-
end
-
-
recent_events =
-
profile.instagram_profile_events
-
.order(detected_at: :desc, id: :desc)
-
.limit(100)
-
.pluck(:kind, :external_id, :occurred_at, :detected_at)
-
.map do |kind, external_id, occurred_at, detected_at|
-
{
-
kind: kind,
-
external_id: external_id,
-
occurred_at: occurred_at&.iso8601,
-
detected_at: detected_at&.iso8601
-
}
-
end
-
-
{
-
username: profile.username,
-
ig_user_id: profile.ig_user_id,
-
display_name: profile.display_name,
-
bio: profile.bio,
-
following: profile.following,
-
follows_you: profile.follows_you,
-
can_message: profile.can_message,
-
restriction_reason: profile.restriction_reason,
-
last_active_at: profile.last_active_at&.iso8601,
-
last_story_seen_at: profile.last_story_seen_at&.iso8601,
-
last_post_at: profile.last_post_at&.iso8601,
-
recent_outgoing_messages: recent_messages,
-
recent_activity_events: recent_events,
-
captured_profile_posts: Array(collected_posts).map do |post|
-
{
-
shortcode: post.shortcode,
-
taken_at: post.taken_at&.iso8601,
-
caption: post.caption,
-
permalink: post.permalink_url,
-
comments: post.instagram_profile_post_comments.recent_first.limit(10).map do |c|
-
{
-
author_username: c.author_username,
-
body: c.body,
-
commented_at: c.commented_at&.iso8601
-
}
-
end
-
}
-
end,
-
captured_profile_image_descriptions: Array(described_posts).map do |post|
-
analysis = post.analysis.is_a?(Hash) ? post.analysis : {}
-
{
-
shortcode: post.shortcode,
-
taken_at: post.taken_at&.iso8601,
-
caption: post.caption,
-
image_description: analysis["image_description"].to_s.presence,
-
topics: Array(analysis["topics"]).first(10),
-
comment_suggestions: Array(analysis["comment_suggestions"]).first(5)
-
}
-
end,
-
accepted_image_inputs: {
-
policy: "Only accepted images are used for combined demographic insights. Exclude deleted posts and skipped/duplicate story artifacts.",
-
accepted_profile_posts: accepted_media_context[:accepted_profile_posts],
-
accepted_story_images: accepted_media_context[:accepted_story_images],
-
prompt_inputs: {
-
combined_insights_required: [ "age_range", "gender_indicators", "location_signals" ],
-
instruction: "Aggregate demographic evidence from accepted profile posts and accepted story images."
-
}
-
},
-
historical_narrative_text: history_narrative,
-
historical_narrative_chunks: history_chunks
-
}
-
end
-
-
def build_media_inputs(profile:, collected_posts:)
-
media = []
-
-
if profile.avatar.attached?
-
encoded = encode_blob_to_data_url(profile.avatar.blob)
-
media << { type: "image", url: encoded, bytes: profile.avatar.blob.download } if encoded.present?
-
elsif profile.profile_pic_url.to_s.strip.present?
-
media << { type: "image", url: profile.profile_pic_url.to_s.strip }
-
end
-
-
Array(collected_posts).first(MAX_AI_IMAGE_COUNT).each do |post|
-
next unless post.media.attached?
-
blob = post.media.blob
-
next unless blob&.content_type.to_s.start_with?("image/")
-
next if blob.byte_size.to_i <= 0
-
-
encoded = encode_blob_to_data_url(blob)
-
next if encoded.blank?
-
-
media << { type: "image", url: encoded, bytes: blob.download }
-
end
-
-
media
-
end
-
-
def enrich_first_profile_images!(account:, profile:, collected_posts:)
-
selected = Array(collected_posts).select { |p| p.media.attached? }.first(MAX_PROFILE_IMAGE_DESCRIPTION_COUNT)
-
-
selected.each do |post|
-
analysis_data = run_post_image_description!(account: account, profile: profile, post: post)
-
next unless analysis_data.is_a?(Hash)
-
-
post.update!(
-
ai_status: "analyzed",
-
analyzed_at: Time.current,
-
ai_provider: analysis_data["provider"],
-
ai_model: analysis_data["model"],
-
analysis: analysis_data["analysis"],
-
metadata: (post.metadata || {}).merge(
-
"analysis_input" => {
-
"shortcode" => post.shortcode,
-
"taken_at" => post.taken_at&.iso8601,
-
"caption" => post.caption.to_s,
-
"image_description" => analysis_data.dig("analysis", "image_description"),
-
"topics" => Array(analysis_data.dig("analysis", "topics")).first(10),
-
"comment_suggestions" => Array(analysis_data.dig("analysis", "comment_suggestions")).first(5)
-
}
-
)
-
)
-
PostFaceRecognitionService.new.process!(post: post)
-
Ai::ProfileAutoTagger.sync_from_post_analysis!(profile: profile, analysis: analysis_data["analysis"])
-
rescue StandardError
-
next
-
end
-
-
selected
-
end
-
-
def run_post_image_description!(account:, profile:, post:)
-
history_narrative = profile.history_narrative_text(max_chunks: 3)
-
history_chunks = profile.history_narrative_chunks(max_chunks: 6)
-
-
payload = {
-
post: {
-
shortcode: post.shortcode,
-
caption: post.caption,
-
taken_at: post.taken_at&.iso8601,
-
permalink: post.permalink_url,
-
likes_count: post.likes_count,
-
comments_count: post.comments_count,
-
comments: post.instagram_profile_post_comments.recent_first.limit(25).map do |c|
-
{
-
author_username: c.author_username,
-
body: c.body,
-
commented_at: c.commented_at&.iso8601
-
}
-
end
-
},
-
author_profile: {
-
username: profile.username,
-
display_name: profile.display_name,
-
bio: profile.bio,
-
can_message: profile.can_message,
-
tags: profile.profile_tags.pluck(:name).sort
-
},
-
rules: {
-
require_manual_review: true,
-
style: "gen_z_light",
-
historical_narrative_text: history_narrative,
-
historical_narrative_chunks: history_chunks
-
}
-
}
-
-
run = Ai::Runner.new(account: account).analyze!(
-
purpose: "post",
-
analyzable: post,
-
payload: payload,
-
media: build_post_media_payload(post),
-
media_fingerprint: media_fingerprint_for(post)
-
)
-
-
{
-
"provider" => run[:provider].key,
-
"model" => run.dig(:result, :model),
-
"analysis" => run.dig(:result, :analysis)
-
}
-
end
-
-
def build_post_media_payload(post)
-
return { type: "none" } unless post.media.attached?
-
-
blob = post.media.blob
-
return { type: "none" } unless blob&.content_type.to_s.start_with?("image/")
-
-
if blob.byte_size.to_i > MAX_INLINE_IMAGE_BYTES
-
return { type: "image", content_type: blob.content_type, url: post.source_media_url.to_s }
-
end
-
-
data = blob.download
-
{
-
type: "image",
-
content_type: blob.content_type,
-
bytes: data,
-
image_data_url: "data:#{blob.content_type};base64,#{Base64.strict_encode64(data)}"
-
}
-
rescue StandardError
-
{ type: "none" }
-
end
-
-
def media_fingerprint_for(post)
-
return post.media_url_fingerprint.to_s if post.media_url_fingerprint.to_s.present?
-
-
if post.media.attached?
-
checksum = post.media.blob&.checksum.to_s
-
return "blob:#{checksum}" if checksum.present?
-
end
-
-
url = post.source_media_url.to_s
-
return Digest::SHA256.hexdigest(url) if url.present?
-
-
nil
-
end
-
-
def encode_blob_to_data_url(blob)
-
return nil unless blob
-
return nil unless blob.content_type.to_s.start_with?("image/")
-
return nil if blob.byte_size.to_i > MAX_INLINE_IMAGE_BYTES
-
-
"data:#{blob.content_type};base64,#{Base64.strict_encode64(blob.download)}"
-
rescue StandardError
-
nil
-
end
-
-
def update_profile_demographics_from_analysis!(profile:, analysis:)
-
return unless analysis.is_a?(Hash)
-
-
demo = analysis["demographic_estimates"]
-
demo = analysis["self_declared"] if !demo.is_a?(Hash) && analysis["self_declared"].is_a?(Hash)
-
demo = {} unless demo.is_a?(Hash)
-
-
attrs = {
-
ai_persona_summary: analysis["summary"].to_s.presence || profile.ai_persona_summary,
-
ai_last_analyzed_at: Time.current
-
}
-
-
age = integer_or_nil(demo["age"])
-
age ||= integer_or_nil(analysis.dig("self_declared", "age"))
-
age ||= inferred_age_from_text(profile: profile, analysis: analysis)
-
attrs[:ai_estimated_age] = age if age.present?
-
-
gender = demo["gender"].to_s.strip
-
gender = analysis.dig("self_declared", "gender").to_s.strip if gender.blank?
-
gender = inferred_gender_from_text(profile: profile, analysis: analysis) if gender.blank?
-
attrs[:ai_estimated_gender] = gender if gender.present?
-
-
location = demo["location"].to_s.strip
-
location = analysis.dig("self_declared", "location").to_s.strip if location.blank?
-
location = inferred_location_from_text(profile: profile, analysis: analysis) if location.blank?
-
attrs[:ai_estimated_location] = location if location.present?
-
-
attrs[:ai_age_confidence] = float_or_nil(demo["age_confidence"]) || (age.present? ? 0.35 : nil)
-
attrs[:ai_gender_confidence] = float_or_nil(demo["gender_confidence"]) || (gender.present? ? 0.3 : nil)
-
attrs[:ai_location_confidence] = float_or_nil(demo["location_confidence"]) || (location.present? ? 0.25 : nil)
-
-
profile.update!(attrs)
-
rescue StandardError
-
nil
-
end
-
-
def aggregate_demographics_from_accumulated_json!(account:, profile:, latest_profile_analysis:, accepted_media_context:)
-
dataset = build_demographics_dataset(
-
profile: profile,
-
latest_profile_analysis: latest_profile_analysis,
-
accepted_media_context: accepted_media_context
-
)
-
aggregated = Ai::ProfileDemographicsAggregator.new(account: account).aggregate!(dataset: dataset)
-
return unless aggregated.is_a?(Hash) && aggregated[:ok] == true
-
-
profile_inference = aggregated[:profile_inference].is_a?(Hash) ? aggregated[:profile_inference] : {}
-
post_inferences = Array(aggregated[:post_inferences]).select { |entry| entry.is_a?(Hash) }
-
combined_insights = build_combined_prompt_insights(profile_inference: profile_inference, post_inferences: post_inferences, dataset: dataset)
-
-
persist_profile_demographic_inference!(
-
profile: profile,
-
profile_inference: profile_inference,
-
source: aggregated[:source].to_s,
-
error: aggregated[:error].to_s.presence
-
)
-
persist_profile_post_demographic_inferences!(
-
profile: profile,
-
profile_inference: profile_inference,
-
post_inferences: post_inferences,
-
source: aggregated[:source].to_s
-
)
-
persist_feed_post_demographic_inferences!(
-
profile: profile,
-
profile_inference: profile_inference,
-
post_inferences: post_inferences,
-
source: aggregated[:source].to_s
-
)
-
persist_combined_prompt_insights!(profile: profile, combined_insights: combined_insights)
-
-
profile.record_event!(
-
kind: "demographics_aggregated",
-
external_id: "demographics_aggregated:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: aggregated[:source].to_s,
-
profile_inference: profile_inference,
-
post_inferences_count: post_inferences.length,
-
profile_dataset_rows: dataset.dig(:analysis_pool, :profile_rows_count),
-
post_dataset_rows: dataset.dig(:analysis_pool, :post_rows_count),
-
accepted_profile_images: dataset.dig(:analysis_pool, :accepted_profile_images_count),
-
accepted_story_images: dataset.dig(:analysis_pool, :accepted_story_images_count),
-
combined_prompt_insights: combined_insights,
-
aggregator_error: aggregated[:error].to_s.presence
-
}
-
)
-
rescue StandardError
-
nil
-
end
-
-
def build_demographics_dataset(profile:, latest_profile_analysis:, accepted_media_context:)
-
profile_runs = profile.ai_analyses.where(purpose: "profile", status: "succeeded").recent_first.limit(30)
-
profile_post_runs = profile.instagram_profile_posts.where.not(analysis: nil).recent_first.limit(220).select { |post| accepted_profile_post?(post) }.first(120)
-
feed_post_runs = profile.instagram_account.instagram_posts.where(instagram_profile_id: profile.id).where.not(analysis: nil).recent_first.limit(120)
-
story_rows = accepted_story_demographic_rows(profile: profile)
-
-
profile_demographics = []
-
-
if latest_profile_analysis.is_a?(Hash)
-
profile_demographics << extract_demographics_from_analysis(latest_profile_analysis)
-
end
-
-
profile_runs.each do |row|
-
extracted = extract_demographics_from_analysis(row.analysis)
-
profile_demographics << extracted if extracted.present?
-
end
-
-
profile_insight_rows = profile.instagram_profile_insights.order(created_at: :desc).limit(20)
-
profile_insight_rows.each do |insight|
-
analysis = insight.raw_analysis
-
extracted = extract_demographics_from_analysis(analysis)
-
profile_demographics << extracted if extracted.present?
-
end
-
-
post_demographics = []
-
-
profile_post_runs.each do |post|
-
extracted = extract_demographics_from_analysis(post.analysis)
-
next if extracted.blank?
-
-
post_demographics << extracted.merge(shortcode: post.shortcode, source: "instagram_profile_posts")
-
end
-
-
feed_post_runs.each do |post|
-
extracted = extract_demographics_from_analysis(post.analysis)
-
next if extracted.blank?
-
-
post_demographics << extracted.merge(shortcode: post.shortcode, source: "instagram_posts")
-
end
-
-
story_rows.each do |story_row|
-
post_demographics << story_row
-
end
-
-
{
-
profile: {
-
username: profile.username,
-
display_name: profile.display_name,
-
bio: profile.bio,
-
current_demographics: {
-
age: profile.ai_estimated_age,
-
age_confidence: profile.ai_age_confidence,
-
gender: profile.ai_estimated_gender,
-
gender_confidence: profile.ai_gender_confidence,
-
location: profile.ai_estimated_location,
-
location_confidence: profile.ai_location_confidence
-
}
-
},
-
analysis_pool: {
-
profile_demographics: profile_demographics,
-
post_demographics: post_demographics,
-
profile_rows_count: profile_demographics.length,
-
post_rows_count: post_demographics.length,
-
accepted_profile_images_count: accepted_media_context[:accepted_profile_posts_count].to_i,
-
accepted_story_images_count: accepted_media_context[:accepted_story_images_count].to_i
-
}
-
}
-
end
-
-
def build_accepted_media_context(profile:)
-
accepted_profile_posts =
-
profile.instagram_profile_posts
-
.recent_first
-
.limit(220)
-
.select { |post| accepted_profile_post?(post) }
-
.first(40)
-
.map do |post|
-
analysis = post.analysis.is_a?(Hash) ? post.analysis : {}
-
{
-
source_ref: post.shortcode,
-
source_type: "instagram_profile_post",
-
taken_at: post.taken_at&.iso8601,
-
caption: post.caption.to_s.tr("\n", " ").byteslice(0, 260),
-
image_description: analysis["image_description"].to_s.presence,
-
relevant: analysis["relevant"],
-
inferred_demographics: analysis["inferred_demographics"].is_a?(Hash) ? analysis["inferred_demographics"] : nil
-
}
-
end
-
-
accepted_story_images =
-
profile.instagram_profile_events
-
.where(kind: InstagramProfileEvent::STORY_ARCHIVE_EVENT_KINDS)
-
.with_attached_media
-
.order(detected_at: :desc, id: :desc)
-
.limit(220)
-
.select { |event| accepted_story_event?(event) }
-
.first(40)
-
.map do |event|
-
meta = event.metadata.is_a?(Hash) ? event.metadata : {}
-
intel = meta["local_story_intelligence"].is_a?(Hash) ? meta["local_story_intelligence"] : {}
-
{
-
source_ref: meta["story_id"].to_s.presence || event.external_id.to_s,
-
source_type: "instagram_story",
-
taken_at: event.occurred_at&.iso8601 || event.detected_at&.iso8601,
-
image_description: meta["ai_image_description"].to_s.presence,
-
ocr_text: intel["ocr_text"].to_s.presence || meta["ocr_text"].to_s.presence,
-
hashtags: Array(intel["hashtags"] || meta["hashtags"]).first(8),
-
mentions: Array(intel["mentions"] || meta["mentions"]).first(8),
-
objects: Array(intel["objects"] || meta["content_signals"]).first(10),
-
relevant: true
-
}
-
end
-
-
{
-
accepted_profile_posts: accepted_profile_posts,
-
accepted_story_images: accepted_story_images,
-
accepted_profile_posts_count: accepted_profile_posts.length,
-
accepted_story_images_count: accepted_story_images.length
-
}
-
end
-
-
def accepted_story_demographic_rows(profile:)
-
profile.instagram_profile_events
-
.where(kind: InstagramProfileEvent::STORY_ARCHIVE_EVENT_KINDS)
-
.with_attached_media
-
.order(detected_at: :desc, id: :desc)
-
.limit(220)
-
.select { |event| accepted_story_event?(event) }
-
.filter_map do |event|
-
meta = event.metadata.is_a?(Hash) ? event.metadata : {}
-
extracted = extract_demographics_from_story_metadata(metadata: meta)
-
next if extracted.blank?
-
-
story_ref = meta["story_id"].to_s.presence || event.external_id.to_s
-
extracted.merge(shortcode: story_ref, source: "instagram_stories", relevant: true)
-
end
-
.first(120)
-
end
-
-
def accepted_profile_post?(post)
-
return false unless post
-
return false unless post.analysis.is_a?(Hash)
-
-
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
return false if ActiveModel::Type::Boolean.new.cast(metadata["deleted_from_source"])
-
-
relevant = post.analysis["relevant"]
-
return false if relevant == false
-
return false unless post.media.attached? || post.source_media_url.to_s.present?
-
-
true
-
end
-
-
def accepted_story_event?(event)
-
return false unless event
-
return false unless event.media.attached?
-
-
metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
-
return false if ActiveModel::Type::Boolean.new.cast(metadata["skipped"])
-
-
intel = metadata["local_story_intelligence"].is_a?(Hash) ? metadata["local_story_intelligence"] : {}
-
return true if intel.present?
-
return true if metadata["ai_image_description"].to_s.present?
-
return true if metadata["ocr_text"].to_s.present?
-
return true if Array(metadata["content_signals"]).any?
-
return true if Array(metadata["hashtags"]).any?
-
return true if Array(metadata["mentions"]).any?
-
-
false
-
end
-
-
def extract_demographics_from_story_metadata(metadata:)
-
raw = metadata.is_a?(Hash) ? metadata : {}
-
intel = raw["local_story_intelligence"].is_a?(Hash) ? raw["local_story_intelligence"] : {}
-
location_tags = Array(intel["location_tags"] || raw["location_tags"]).map(&:to_s).reject(&:blank?)
-
-
text_parts = [
-
raw["ai_image_description"].to_s,
-
intel["ocr_text"].to_s,
-
raw["ocr_text"].to_s,
-
intel["transcript"].to_s,
-
Array(intel["hashtags"] || raw["hashtags"]).join(" "),
-
Array(intel["mentions"] || raw["mentions"]).join(" "),
-
location_tags.join(" ")
-
].map(&:strip).reject(&:blank?)
-
-
text = text_parts.join(" ").downcase
-
age =
-
if (m = text.match(/\b([1-7]\d)\s?(?:yo|yrs?|years?\s*old)\b/))
-
m[1].to_i
-
end
-
gender =
-
if text.match?(/\b(she\/her|she her|woman|girl|mrs|ms)\b/)
-
"female"
-
elsif text.match?(/\b(he\/him|he him|man|boy|mr)\b/)
-
"male"
-
elsif text.match?(/\b(they\/them|non[- ]?binary)\b/)
-
"non-binary"
-
end
-
location = location_tags.first.to_s.presence
-
if location.blank? && (m = text.match(/(?:based in|from|in)\s+([a-z][a-z\s,.-]{2,40})/))
-
location = m[1].to_s.split(/[|•]/).first.to_s.strip.titleize
-
end
-
-
evidence = text_parts.first(3).join(" | ").byteslice(0, 220)
-
out = {
-
age: age,
-
age_confidence: age.present? ? 0.28 : nil,
-
gender: normalize_unknown_string(gender),
-
gender_confidence: gender.present? ? 0.26 : nil,
-
location: normalize_unknown_string(location),
-
location_confidence: location.present? ? 0.24 : nil,
-
evidence: evidence.presence
-
}.compact
-
-
demo_values = [out[:age], out[:gender], out[:location]].compact
-
return {} if demo_values.empty?
-
-
out
-
end
-
-
def build_combined_prompt_insights(profile_inference:, post_inferences:, dataset:)
-
rows = Array(dataset.dig(:analysis_pool, :post_demographics))
-
ages = rows.map { |entry| integer_or_nil(entry[:age] || entry["age"]) }.compact
-
ages << integer_or_nil(profile_inference[:age])
-
ages = ages.compact
-
age_range = ages.any? ? "#{ages.min}-#{ages.max}" : nil
-
-
genders = rows.map { |entry| normalize_unknown_string(entry[:gender] || entry["gender"]) }.compact
-
genders << normalize_unknown_string(profile_inference[:gender])
-
genders.concat(Array(post_inferences).map { |entry| normalize_unknown_string(entry[:gender] || entry["gender"]) })
-
gender_indicators = genders.compact.group_by(&:itself).sort_by { |_value, bucket| -bucket.length }.first(3).map(&:first)
-
-
locations = rows.map { |entry| normalize_unknown_string(entry[:location] || entry["location"]) }.compact
-
locations << normalize_unknown_string(profile_inference[:location])
-
locations.concat(Array(post_inferences).map { |entry| normalize_unknown_string(entry[:location] || entry["location"]) })
-
location_signals = locations.compact.group_by(&:itself).sort_by { |_value, bucket| -bucket.length }.first(5).map(&:first)
-
-
{
-
age_range: age_range,
-
gender_indicators: gender_indicators,
-
location_signals: location_signals,
-
accepted_profile_images_count: dataset.dig(:analysis_pool, :accepted_profile_images_count).to_i,
-
accepted_story_images_count: dataset.dig(:analysis_pool, :accepted_story_images_count).to_i
-
}.compact
-
end
-
-
def persist_combined_prompt_insights!(profile:, combined_insights:)
-
return unless combined_insights.is_a?(Hash)
-
return if combined_insights.except(:accepted_profile_images_count, :accepted_story_images_count).values.all?(&:blank?)
-
-
line = [
-
"Combined insights",
-
"age_range=#{combined_insights[:age_range]}",
-
"gender_indicators=#{Array(combined_insights[:gender_indicators]).join(', ')}",
-
"location_signals=#{Array(combined_insights[:location_signals]).join(', ')}",
-
"accepted_posts=#{combined_insights[:accepted_profile_images_count].to_i}",
-
"accepted_stories=#{combined_insights[:accepted_story_images_count].to_i}"
-
].join(" | ")
-
-
profile.update!(
-
ai_persona_summary: [ profile.ai_persona_summary.to_s.presence, line ].compact.join("\n")
-
)
-
rescue StandardError
-
nil
-
end
-
-
def extract_demographics_from_analysis(analysis)
-
return {} unless analysis.is_a?(Hash)
-
-
demo = analysis["demographic_estimates"].is_a?(Hash) ? analysis["demographic_estimates"] : {}
-
declared = analysis["self_declared"].is_a?(Hash) ? analysis["self_declared"] : {}
-
inferred = analysis["inferred_demographics"].is_a?(Hash) ? analysis["inferred_demographics"] : {}
-
-
age = integer_or_nil(demo["age"]) || integer_or_nil(declared["age"]) || integer_or_nil(inferred["age"])
-
gender = demo["gender"].to_s.strip.presence || declared["gender"].to_s.strip.presence || inferred["gender"].to_s.strip.presence
-
location = demo["location"].to_s.strip.presence || declared["location"].to_s.strip.presence || inferred["location"].to_s.strip.presence
-
-
{
-
age: age,
-
age_confidence: float_or_nil(demo["age_confidence"]) || float_or_nil(inferred["age_confidence"]),
-
gender: normalize_unknown_string(gender),
-
gender_confidence: float_or_nil(demo["gender_confidence"]) || float_or_nil(inferred["gender_confidence"]),
-
location: normalize_unknown_string(location),
-
location_confidence: float_or_nil(demo["location_confidence"]) || float_or_nil(inferred["location_confidence"]),
-
evidence: analysis["evidence"].to_s.presence || demo["evidence"].to_s.presence
-
}.compact
-
end
-
-
def persist_profile_demographic_inference!(profile:, profile_inference:, source:, error:)
-
attrs = { ai_last_analyzed_at: Time.current }
-
-
maybe_age = integer_or_nil(profile_inference[:age])
-
maybe_age_conf = float_or_nil(profile_inference[:age_confidence])
-
if should_replace_value?(current: profile.ai_estimated_age, candidate: maybe_age, current_confidence: profile.ai_age_confidence, candidate_confidence: maybe_age_conf)
-
attrs[:ai_estimated_age] = maybe_age
-
attrs[:ai_age_confidence] = maybe_age_conf if maybe_age_conf
-
end
-
-
maybe_gender = normalize_unknown_string(profile_inference[:gender])
-
maybe_gender_conf = float_or_nil(profile_inference[:gender_confidence])
-
if should_replace_value?(current: normalize_unknown_string(profile.ai_estimated_gender), candidate: maybe_gender, current_confidence: profile.ai_gender_confidence, candidate_confidence: maybe_gender_conf)
-
attrs[:ai_estimated_gender] = maybe_gender
-
attrs[:ai_gender_confidence] = maybe_gender_conf if maybe_gender_conf
-
end
-
-
maybe_location = normalize_unknown_string(profile_inference[:location])
-
maybe_location_conf = float_or_nil(profile_inference[:location_confidence])
-
if should_replace_value?(current: normalize_unknown_string(profile.ai_estimated_location), candidate: maybe_location, current_confidence: profile.ai_location_confidence, candidate_confidence: maybe_location_conf)
-
attrs[:ai_estimated_location] = maybe_location
-
attrs[:ai_location_confidence] = maybe_location_conf if maybe_location_conf
-
end
-
-
evidence = [ profile_inference[:evidence].to_s, profile_inference[:why].to_s, error.to_s ].reject(&:blank?).join(" | ")
-
if evidence.present?
-
attrs[:ai_persona_summary] = [ profile.ai_persona_summary.to_s.presence, evidence ].compact.join("\n")
-
end
-
-
profile.update!(attrs) if attrs.keys.length > 1 || attrs[:ai_persona_summary].present?
-
end
-
-
def persist_profile_post_demographic_inferences!(profile:, profile_inference:, post_inferences:, source:)
-
by_shortcode = post_inferences.index_by { |entry| entry[:shortcode].to_s }
-
-
profile.instagram_profile_posts.recent_first.limit(220).each do |post|
-
next unless accepted_profile_post?(post)
-
-
post_hint = by_shortcode[post.shortcode.to_s]
-
enrich_post_demographics!(
-
record: post,
-
profile_inference: profile_inference,
-
post_hint: post_hint,
-
source: source
-
)
-
end
-
end
-
-
def persist_feed_post_demographic_inferences!(profile:, profile_inference:, post_inferences:, source:)
-
by_shortcode = post_inferences.index_by { |entry| entry[:shortcode].to_s }
-
-
profile.instagram_account.instagram_posts.where(instagram_profile_id: profile.id).recent_first.limit(150).each do |post|
-
analysis = post.analysis.is_a?(Hash) ? post.analysis : {}
-
next if analysis["relevant"] == false
-
-
post_hint = by_shortcode[post.shortcode.to_s]
-
enrich_post_demographics!(
-
record: post,
-
profile_inference: profile_inference,
-
post_hint: post_hint,
-
source: source
-
)
-
end
-
end
-
-
def enrich_post_demographics!(record:, profile_inference:, post_hint:, source:)
-
base = record.analysis.is_a?(Hash) ? record.analysis.deep_dup : {}
-
inferred = base["inferred_demographics"].is_a?(Hash) ? base["inferred_demographics"].deep_dup : {}
-
-
relevant = ActiveModel::Type::Boolean.new.cast(post_hint&.dig(:relevant))
-
relevant ||= ActiveModel::Type::Boolean.new.cast(base["relevant"])
-
-
age = integer_or_nil(post_hint&.dig(:age)) || integer_or_nil(profile_inference[:age])
-
gender = normalize_unknown_string(post_hint&.dig(:gender)) || normalize_unknown_string(profile_inference[:gender])
-
location = normalize_unknown_string(post_hint&.dig(:location)) || normalize_unknown_string(profile_inference[:location])
-
confidence = float_or_nil(post_hint&.dig(:confidence)) || float_or_nil(profile_inference[:age_confidence]) || 0.3
-
-
changed = false
-
if inferred["age"].blank? && age.present?
-
inferred["age"] = age
-
changed = true
-
end
-
if normalize_unknown_string(inferred["gender"]).blank? && gender.present?
-
inferred["gender"] = gender
-
changed = true
-
end
-
if normalize_unknown_string(inferred["location"]).blank? && location.present?
-
inferred["location"] = location
-
changed = true
-
end
-
-
if changed
-
inferred["confidence"] = confidence
-
inferred["age_confidence"] = float_or_nil(profile_inference[:age_confidence]) if inferred["age_confidence"].blank?
-
inferred["gender_confidence"] = float_or_nil(profile_inference[:gender_confidence]) if inferred["gender_confidence"].blank?
-
inferred["location_confidence"] = float_or_nil(profile_inference[:location_confidence]) if inferred["location_confidence"].blank?
-
inferred["relevant"] = relevant
-
inferred["source"] = source.to_s.presence || "json_aggregator"
-
inferred["updated_at"] = Time.current.utc.iso8601(3)
-
inferred["evidence"] = post_hint&.dig(:evidence).to_s.presence || profile_inference[:evidence].to_s.presence
-
-
base["inferred_demographics"] = inferred
-
record.update!(analysis: base)
-
end
-
rescue StandardError
-
nil
-
end
-
-
def should_replace_value?(current:, candidate:, current_confidence:, candidate_confidence:)
-
return false if candidate.blank?
-
return true if current.blank?
-
-
current_unknown = normalize_unknown_string(current).blank?
-
return true if current_unknown
-
-
cand_conf = float_or_nil(candidate_confidence).to_f
-
curr_conf = float_or_nil(current_confidence).to_f
-
-
cand_conf > (curr_conf + 0.1)
-
end
-
-
def normalize_unknown_string(value)
-
text = value.to_s.strip
-
return nil if text.blank?
-
return nil if %w[unknown n/a none null].include?(text.downcase)
-
-
text
-
end
-
-
def inferred_age_from_text(profile:, analysis:)
-
text = [ profile.bio.to_s, analysis["summary"].to_s ].join(" ").downcase
-
return 21 if text.match?(/\b(student|college|university|campus|undergrad)\b/)
-
return 17 if text.match?(/\b(high school|school life|class of 20\d{2})\b/)
-
return 34 if text.match?(/\b(mom|dad|parent)\b/)
-
-
26
-
end
-
-
def inferred_gender_from_text(profile:, analysis:)
-
text = [ profile.bio.to_s, analysis["summary"].to_s ].join(" ").downcase
-
return "female" if text.match?(/\b(she\/her|she her|woman|girl|mrs|ms)\b/)
-
return "male" if text.match?(/\b(he\/him|he him|man|boy|mr)\b/)
-
return "non-binary" if text.match?(/\b(they\/them|non[- ]?binary)\b/)
-
-
"unknown"
-
end
-
-
def inferred_location_from_text(profile:, analysis:)
-
text = [
-
profile.bio.to_s,
-
analysis["summary"].to_s,
-
Array(analysis["languages"]).map { |l| l.is_a?(Hash) ? l["language"] : l }.join(" ")
-
].join(" ").downcase
-
-
if (m = text.match(/(?:📍|based in|from)\s+([a-z][a-z\s,.-]{2,40})/))
-
return m[1].to_s.split(/[|•]/).first.to_s.strip.titleize
-
end
-
-
return "United States" if text.match?(/\b(english|usa|us)\b/)
-
return "India" if text.match?(/\b(hindi|india|indian)\b/)
-
-
"unknown"
-
end
-
-
def integer_or_nil(value)
-
return nil if value.blank?
-
Integer(value)
-
rescue StandardError
-
nil
-
end
-
-
def float_or_nil(value)
-
return nil if value.blank?
-
Float(value)
-
rescue StandardError
-
nil
-
end
-
end
-
class AnalyzeInstagramProfilePostJob < ApplicationJob
-
queue_as :ai_visual_queue
-
-
PROFILE_INCOMPLETE_REASON_CODES =
-
if defined?(ProcessPostMetadataTaggingJob::PROFILE_INCOMPLETE_REASON_CODES)
-
ProcessPostMetadataTaggingJob::PROFILE_INCOMPLETE_REASON_CODES
-
else
-
%w[
-
latest_posts_not_analyzed
-
insufficient_analyzed_posts
-
no_recent_posts_available
-
missing_structured_post_signals
-
profile_preparation_failed
-
profile_preparation_error
-
].freeze
-
end
-
COMMENT_RETRY_MAX_ATTEMPTS = ENV.fetch("POST_COMMENT_RETRY_MAX_ATTEMPTS", 3).to_i.clamp(1, 10)
-
-
DEFAULT_TASK_FLAGS = {
-
analyze_visual: true,
-
analyze_faces: true,
-
run_ocr: true,
-
run_video: true,
-
run_metadata: true,
-
generate_comments: true,
-
enforce_comment_evidence_policy: true,
-
retry_on_incomplete_profile: true
-
}.freeze
-
-
def perform(
-
instagram_account_id:,
-
instagram_profile_id:,
-
instagram_profile_post_id:,
-
task_flags: {},
-
pipeline_mode: "async"
-
)
-
account = InstagramAccount.find(instagram_account_id)
-
profile = account.instagram_profiles.find(instagram_profile_id)
-
post = profile.instagram_profile_posts.find(instagram_profile_post_id)
-
policy_decision = Instagram::ProfileScanPolicy.new(profile: profile).decision
-
-
if policy_decision[:skip_post_analysis]
-
if policy_decision[:reason_code].to_s == "non_personal_profile_page" || policy_decision[:reason_code].to_s == "scan_excluded_tag"
-
Instagram::ProfileScanPolicy.mark_scan_excluded!(profile: profile)
-
end
-
-
Instagram::ProfileScanPolicy.mark_post_analysis_skipped!(post: post, decision: policy_decision)
-
return
-
end
-
-
resolved_flags = resolve_task_flags(post: post, task_flags: task_flags)
-
-
if pipeline_mode.to_s == "inline"
-
perform_inline(
-
account: account,
-
profile: profile,
-
post: post,
-
task_flags: resolved_flags
-
)
-
return
-
end
-
-
start_orchestrated_pipeline!(
-
account: account,
-
profile: profile,
-
post: post,
-
task_flags: resolved_flags
-
)
-
rescue StandardError => e
-
post&.update!(ai_status: "failed") if defined?(post) && post&.persisted?
-
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Profile post analysis failed: #{e.message}" }
-
) if defined?(account) && account
-
-
raise
-
end
-
-
private
-
-
def start_orchestrated_pipeline!(account:, profile:, post:, task_flags:)
-
pipeline_state = Ai::PostAnalysisPipelineState.new(post: post)
-
run_id = pipeline_state.start!(
-
task_flags: task_flags,
-
source_job: self.class.name
-
)
-
required_steps = pipeline_state.required_steps(run_id: run_id)
-
-
Ops::StructuredLogger.info(
-
event: "ai.pipeline.started",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
pipeline_run_id: run_id,
-
required_steps: required_steps,
-
task_flags: task_flags
-
}
-
)
-
-
enqueue_step_job!(
-
step: "visual",
-
job_class: ProcessPostVisualAnalysisJob,
-
account: account,
-
profile: profile,
-
post: post,
-
run_id: run_id,
-
pipeline_state: pipeline_state
-
)
-
-
enqueue_step_job!(
-
step: "face",
-
job_class: ProcessPostFaceAnalysisJob,
-
account: account,
-
profile: profile,
-
post: post,
-
run_id: run_id,
-
pipeline_state: pipeline_state
-
)
-
-
enqueue_step_job!(
-
step: "ocr",
-
job_class: ProcessPostOcrAnalysisJob,
-
account: account,
-
profile: profile,
-
post: post,
-
run_id: run_id,
-
pipeline_state: pipeline_state
-
)
-
-
enqueue_step_job!(
-
step: "video",
-
job_class: ProcessPostVideoAnalysisJob,
-
account: account,
-
profile: profile,
-
post: post,
-
run_id: run_id,
-
pipeline_state: pipeline_state
-
)
-
-
FinalizePostAnalysisPipelineJob.perform_later(
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
pipeline_run_id: run_id,
-
attempts: 0
-
)
-
end
-
-
def enqueue_step_job!(step:, job_class:, account:, profile:, post:, run_id:, pipeline_state:)
-
return unless pipeline_state.required_steps(run_id: run_id).include?(step)
-
-
job = job_class.perform_later(
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
pipeline_run_id: run_id
-
)
-
-
pipeline_state.mark_step_queued!(
-
run_id: run_id,
-
step: step,
-
queue_name: job.queue_name,
-
active_job_id: job.job_id,
-
result: {
-
enqueued_by: self.class.name,
-
enqueued_at: Time.current.iso8601(3)
-
}
-
)
-
-
Ops::StructuredLogger.info(
-
event: "ai.pipeline.step_enqueued",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
pipeline_run_id: run_id,
-
step: step,
-
queue_name: job.queue_name,
-
enqueued_job_id: job.job_id
-
}
-
)
-
rescue StandardError => e
-
pipeline_state.mark_step_completed!(
-
run_id: run_id,
-
step: step,
-
status: "failed",
-
error: "enqueue_failed: #{e.class}: #{e.message}",
-
result: {
-
reason: "enqueue_failed"
-
}
-
)
-
-
Ops::StructuredLogger.warn(
-
event: "ai.pipeline.step_enqueue_failed",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
pipeline_run_id: run_id,
-
step: step,
-
error_class: e.class.name,
-
error_message: e.message.to_s.byteslice(0, 280)
-
}
-
)
-
end
-
-
def perform_inline(account:, profile:, post:, task_flags:)
-
builder = Ai::PostAnalysisContextBuilder.new(profile: profile, post: post)
-
run = nil
-
-
if task_flags[:analyze_visual]
-
payload = builder.payload
-
media = builder.media_payload
-
run = Ai::Runner.new(account: account).analyze!(
-
purpose: "post",
-
analyzable: post,
-
payload: payload,
-
media: media,
-
media_fingerprint: builder.media_fingerprint(media: media),
-
provider_options: inline_provider_options(task_flags: task_flags)
-
)
-
-
post.update!(
-
ai_status: "analyzed",
-
analyzed_at: Time.current,
-
ai_provider: run[:provider].key,
-
ai_model: run.dig(:result, :model),
-
analysis: run.dig(:result, :analysis)
-
)
-
end
-
-
if task_flags[:analyze_faces]
-
face_recognition_result = PostFaceRecognitionService.new.process!(post: post)
-
merge_face_summary!(post: post, face_recognition_result: face_recognition_result)
-
end
-
-
if task_flags[:run_metadata]
-
analysis_hash = post.analysis.is_a?(Hash) ? post.analysis : {}
-
Ai::ProfileAutoTagger.sync_from_post_analysis!(profile: profile, analysis: analysis_hash)
-
end
-
-
comment_result = nil
-
if task_flags[:generate_comments]
-
comment_result = Ai::PostCommentGenerationService.new(
-
account: account,
-
profile: profile,
-
post: post,
-
enforce_required_evidence: ActiveModel::Type::Boolean.new.cast(task_flags[:enforce_comment_evidence_policy])
-
).run!
-
post.reload
-
-
if ActiveModel::Type::Boolean.new.cast(task_flags[:retry_on_incomplete_profile]) &&
-
retryable_profile_incomplete_block?(post: post, comment_result: comment_result)
-
enqueue_build_history_retry_if_needed!(account: account, profile: profile, post: post)
-
end
-
end
-
-
post.update!(ai_status: "analyzed", analyzed_at: Time.current) unless post.ai_status.to_s == "analyzed"
-
notification_message =
-
if comment_result&.dig(:reason_code).to_s == "missing_required_evidence"
-
"Profile post analyzed: #{post.shortcode}. Waiting for Build History to finish comment generation."
-
else
-
"Profile post analyzed: #{post.shortcode}."
-
end
-
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: {
-
kind: "notice",
-
message: notification_message
-
}
-
)
-
end
-
-
def merge_face_summary!(post:, face_recognition_result:)
-
analysis = post.analysis.is_a?(Hash) ? post.analysis.deep_dup : {}
-
face_meta = post.metadata.is_a?(Hash) ? post.metadata.dig("face_recognition") : nil
-
face_meta = {} unless face_meta.is_a?(Hash)
-
matched_people = Array(face_meta["matched_people"])
-
-
analysis["face_summary"] = {
-
"face_count" => face_meta["face_count"].to_i,
-
"owner_faces_count" => matched_people.count { |row| ActiveModel::Type::Boolean.new.cast(row["owner_match"] || row[:owner_match]) },
-
"recurring_faces_count" => matched_people.count { |row| ActiveModel::Type::Boolean.new.cast(row["recurring_face"] || row[:recurring_face]) },
-
"detection_source" => face_meta["detection_source"].to_s.presence || face_recognition_result[:reason].to_s.presence,
-
"participant_summary" => face_meta["participant_summary"].to_s.presence,
-
"detection_reason" => face_meta["detection_reason"].to_s.presence,
-
"detection_error" => face_meta["detection_error"].to_s.presence
-
}.compact
-
-
post.update!(analysis: analysis)
-
rescue StandardError
-
nil
-
end
-
-
def resolve_task_flags(post:, task_flags:)
-
flags = DEFAULT_TASK_FLAGS.deep_dup
-
incoming = task_flags.is_a?(Hash) ? task_flags : {}
-
-
incoming.each do |key, value|
-
symbol_key = key.to_s.underscore.to_sym
-
next unless flags.key?(symbol_key)
-
-
flags[symbol_key] = ActiveModel::Type::Boolean.new.cast(value)
-
end
-
-
unless post.media.attached? && post.media.blob&.content_type.to_s.start_with?("video/")
-
flags[:run_video] = false
-
end
-
-
flags
-
end
-
-
def inline_provider_options(task_flags:)
-
{
-
visual_only: false,
-
include_faces: ActiveModel::Type::Boolean.new.cast(task_flags[:analyze_faces]),
-
include_ocr: ActiveModel::Type::Boolean.new.cast(task_flags[:run_ocr]),
-
include_comment_generation: false,
-
include_video_analysis: ActiveModel::Type::Boolean.new.cast(task_flags[:run_video])
-
}
-
end
-
-
def retryable_profile_incomplete_block?(post:, comment_result:)
-
return false unless ActiveModel::Type::Boolean.new.cast(comment_result[:blocked])
-
return false unless comment_result[:reason_code].to_s == "missing_required_evidence"
-
-
policy = post.metadata.is_a?(Hash) ? post.metadata["comment_generation_policy"] : nil
-
return false unless policy.is_a?(Hash)
-
return false if ActiveModel::Type::Boolean.new.cast(policy["history_ready"])
-
-
PROFILE_INCOMPLETE_REASON_CODES.include?(policy["history_reason_code"].to_s)
-
rescue StandardError
-
false
-
end
-
-
def enqueue_build_history_retry_if_needed!(account:, profile:, post:)
-
metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
policy = metadata["comment_generation_policy"].is_a?(Hash) ? metadata["comment_generation_policy"].deep_dup : {}
-
retry_state = policy["retry_state"].is_a?(Hash) ? policy["retry_state"].deep_dup : {}
-
attempts = retry_state["attempts"].to_i
-
return { queued: false, reason: "retry_attempts_exhausted" } if attempts >= COMMENT_RETRY_MAX_ATTEMPTS
-
-
history_reason_code = policy["history_reason_code"].to_s
-
return { queued: false, reason: "history_reason_not_retryable" } unless PROFILE_INCOMPLETE_REASON_CODES.include?(history_reason_code)
-
-
history_result = BuildInstagramProfileHistoryJob.enqueue_with_resume_if_needed!(
-
account: account,
-
profile: profile,
-
trigger_source: "post_inline_comment_fallback",
-
requested_by: self.class.name,
-
resume_job: {
-
job_class: self.class,
-
job_kwargs: {
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
pipeline_mode: "inline",
-
task_flags: {
-
analyze_visual: false,
-
analyze_faces: false,
-
run_ocr: false,
-
run_video: false,
-
run_metadata: true,
-
generate_comments: true,
-
enforce_comment_evidence_policy: true,
-
retry_on_incomplete_profile: true
-
}
-
}
-
}
-
)
-
return { queued: false, reason: history_result[:reason] } unless ActiveModel::Type::Boolean.new.cast(history_result[:accepted])
-
-
retry_state["attempts"] = attempts + 1
-
retry_state["last_reason_code"] = history_reason_code
-
retry_state["last_blocked_at"] = Time.current.iso8601(3)
-
retry_state["last_enqueued_at"] = Time.current.iso8601(3)
-
retry_state["next_run_at"] = history_result[:next_run_at].to_s.presence
-
retry_state["job_id"] = history_result[:job_id].to_s.presence
-
retry_state["build_history_action_log_id"] = history_result[:action_log_id].to_i if history_result[:action_log_id].present?
-
retry_state["source"] = self.class.name
-
retry_state["mode"] = "build_history_fallback"
-
-
policy["retry_state"] = retry_state
-
policy["updated_at"] = Time.current.iso8601(3)
-
metadata["comment_generation_policy"] = policy
-
post.update!(metadata: metadata)
-
-
{
-
queued: true,
-
reason: "build_history_fallback_registered",
-
job_id: history_result[:job_id].to_s,
-
action_log_id: history_result[:action_log_id],
-
next_run_at: history_result[:next_run_at].to_s
-
}
-
rescue StandardError => e
-
{
-
queued: false,
-
reason: "retry_enqueue_failed",
-
error_class: e.class.name,
-
error_message: e.message.to_s
-
}
-
end
-
end
-
class AppendProfileHistoryNarrativeJob < ApplicationJob
-
queue_as :maintenance
-
-
def perform(instagram_profile_event_id:, mode: "event", intelligence: nil)
-
event = InstagramProfileEvent.find_by(id: instagram_profile_event_id)
-
return unless event
-
-
case mode.to_s
-
when "event"
-
Ai::ProfileHistoryNarrativeBuilder.append_event!(event)
-
when "story_intelligence"
-
payload = intelligence.is_a?(Hash) ? intelligence.deep_symbolize_keys : {}
-
Ai::ProfileHistoryNarrativeBuilder.append_story_intelligence!(event, intelligence: payload)
-
end
-
rescue StandardError => e
-
Rails.logger.warn("[AppendProfileHistoryNarrativeJob] failed for event_id=#{instagram_profile_event_id}: #{e.class}: #{e.message}")
-
nil
-
end
-
end
-
1
require "json"
-
-
1
class ApplicationJob < ActiveJob::Base
-
# Automatically retry jobs that encountered a deadlock
-
# retry_on ActiveRecord::Deadlocked
-
-
# Most jobs are safe to ignore if the underlying records are no longer available
-
# discard_on ActiveJob::DeserializationError
-
-
1
discard_on Instagram::AuthenticationRequiredError do |job, error|
-
context = Jobs::ContextExtractor.from_active_job_arguments(job.arguments)
-
Rails.logger.warn(
-
"[jobs.auth_required] #{job.class.name} discarded: #{error.message} " \
-
"(account_id=#{context[:instagram_account_id] || '-'}, profile_id=#{context[:instagram_profile_id] || '-'})"
-
)
-
-
Ops::LiveUpdateBroadcaster.broadcast!(
-
topic: "jobs_changed",
-
account_id: context[:instagram_account_id],
-
payload: {
-
status: "discarded",
-
reason: "authentication_required",
-
job_class: job.class.name,
-
instagram_account_id: context[:instagram_account_id],
-
instagram_profile_id: context[:instagram_profile_id],
-
instagram_profile_post_id: context[:instagram_profile_post_id]
-
},
-
throttle_key: "jobs_changed"
-
)
-
end
-
-
1
around_perform do |job, block|
-
2
context = Jobs::ContextExtractor.from_active_job_arguments(job.arguments)
-
2
started_at = Time.current
-
2
started_monotonic = Process.clock_gettime(Process::CLOCK_MONOTONIC) rescue nil
-
-
2
Current.set(
-
active_job_id: job.job_id,
-
provider_job_id: job.provider_job_id,
-
job_class: job.class.name,
-
queue_name: job.queue_name,
-
instagram_account_id: context[:instagram_account_id],
-
instagram_profile_id: context[:instagram_profile_id]
-
) do
-
2
Ai::ApiUsageTracker.with_context(
-
active_job_id: job.job_id,
-
provider_job_id: job.provider_job_id,
-
job_class: job.class.name,
-
queue_name: job.queue_name,
-
instagram_account_id: context[:instagram_account_id],
-
instagram_profile_id: context[:instagram_profile_id]
-
) do
-
2
Ops::StructuredLogger.info(
-
event: "job.started",
-
payload: {
-
active_job_id: job.job_id,
-
job_class: job.class.name,
-
queue_name: job.queue_name,
-
instagram_account_id: context[:instagram_account_id],
-
instagram_profile_id: context[:instagram_profile_id]
-
}
-
)
-
-
2
Ops::LiveUpdateBroadcaster.broadcast!(
-
topic: "jobs_changed",
-
account_id: context[:instagram_account_id],
-
payload: {
-
status: "started",
-
job_class: job.class.name,
-
active_job_id: job.job_id,
-
instagram_account_id: context[:instagram_account_id],
-
instagram_profile_id: context[:instagram_profile_id],
-
instagram_profile_post_id: context[:instagram_profile_post_id]
-
},
-
throttle_key: "jobs_changed"
-
)
-
-
2
block.call
-
-
duration_ms =
-
2
then: 2
else: 0
if started_monotonic
-
2
((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_monotonic) * 1000).round
-
end
-
-
2
Ops::StructuredLogger.info(
-
event: "job.completed",
-
payload: {
-
active_job_id: job.job_id,
-
job_class: job.class.name,
-
queue_name: job.queue_name,
-
instagram_account_id: context[:instagram_account_id],
-
instagram_profile_id: context[:instagram_profile_id],
-
duration_ms: duration_ms
-
}
-
)
-
-
2
Ops::LiveUpdateBroadcaster.broadcast!(
-
topic: "jobs_changed",
-
account_id: context[:instagram_account_id],
-
payload: {
-
status: "completed",
-
job_class: job.class.name,
-
active_job_id: job.job_id,
-
instagram_account_id: context[:instagram_account_id],
-
instagram_profile_id: context[:instagram_profile_id],
-
instagram_profile_post_id: context[:instagram_profile_post_id]
-
},
-
throttle_key: "jobs_changed"
-
)
-
end
-
end
-
rescue StandardError => e
-
begin
-
queue_adapter = Rails.application.config.active_job.queue_adapter.to_s
-
solid_id =
-
begin
-
then: 0
else: 0
if queue_adapter == "solid_queue"
-
then: 0
else: 0
SolidQueue::Job.find_by(active_job_id: job.job_id)&.id
-
end
-
rescue StandardError
-
nil
-
end
-
-
failure = BackgroundJobFailure.create!(
-
active_job_id: job.job_id,
-
queue_name: job.queue_name,
-
job_class: job.class.name,
-
arguments_json: job.send(:safe_json, job.arguments),
-
provider_job_id: job.provider_job_id,
-
solid_queue_job_id: solid_id,
-
instagram_account_id: context[:instagram_account_id],
-
instagram_profile_id: context[:instagram_profile_id],
-
error_class: e.class.name,
-
error_message: e.message.to_s,
-
backtrace: Array(e.backtrace).join("\n"),
-
failure_kind: job.send(:failure_kind_for, e),
-
retryable: job.send(:retryable_for, e),
-
occurred_at: Time.current,
-
metadata: {
-
queue_backend: queue_adapter,
-
instagram_account_id: context[:instagram_account_id],
-
instagram_profile_id: context[:instagram_profile_id],
-
job_scope: context[:job_scope],
-
context_label: context[:context_label],
-
then: 0
else: 0
started_at: started_at&.iso8601,
-
failed_at: Time.current.iso8601,
-
duration_ms: ((Time.current - started_at) * 1000).round,
-
locale: job.locale,
-
timezone: job.timezone,
-
executions: job.executions,
-
exception_executions: job.exception_executions
-
}
-
)
-
-
Ops::IssueTracker.record_job_failure!(
-
job: job,
-
exception: e,
-
context: context,
-
failure_record: failure
-
)
-
-
Ops::StructuredLogger.error(
-
event: "job.failed",
-
payload: {
-
active_job_id: job.job_id,
-
job_class: job.class.name,
-
queue_name: job.queue_name,
-
instagram_account_id: context[:instagram_account_id],
-
instagram_profile_id: context[:instagram_profile_id],
-
error_class: e.class.name,
-
error_message: e.message,
-
failure_kind: failure.failure_kind,
-
retryable: failure.retryable?
-
}
-
)
-
-
Ops::LiveUpdateBroadcaster.broadcast!(
-
topic: "jobs_changed",
-
account_id: context[:instagram_account_id],
-
payload: {
-
status: "failed",
-
job_class: job.class.name,
-
active_job_id: job.job_id,
-
failure_kind: failure.failure_kind,
-
instagram_account_id: context[:instagram_account_id],
-
instagram_profile_id: context[:instagram_profile_id],
-
instagram_profile_post_id: context[:instagram_profile_post_id]
-
},
-
throttle_key: "jobs_changed"
-
)
-
rescue StandardError
-
# Never let failure logging take down job execution error reporting.
-
nil
-
end
-
-
raise
-
end
-
-
1
private
-
-
1
def safe_json(value)
-
JSON.generate(value)
-
rescue StandardError
-
JSON.generate({ error: "unable_to_serialize_arguments" })
-
end
-
-
1
def failure_kind_for(error)
-
then: 0
else: 0
return "authentication" if authentication_error?(error)
-
then: 0
else: 0
return "transient" if transient_error?(error)
-
-
"runtime"
-
end
-
-
1
def retryable_for(error)
-
!authentication_error?(error)
-
end
-
-
1
def transient_error?(error)
-
classes = [
-
"Net::OpenTimeout",
-
"Net::ReadTimeout",
-
"Errno::ECONNRESET",
-
"Errno::ECONNREFUSED",
-
"Selenium::WebDriver::Error::TimeoutError"
-
].filter_map(&:safe_constantize)
-
classes.any? { |klass| error.is_a?(klass) }
-
rescue StandardError
-
false
-
end
-
-
1
def authentication_error?(error)
-
then: 0
else: 0
return true if error.is_a?(Instagram::AuthenticationRequiredError)
-
-
msg = error.message.to_s.downcase
-
msg.include?("stored cookies are not authenticated") ||
-
msg.include?("authentication required") ||
-
msg.include?("no stored cookies")
-
end
-
end
-
class AutoEngageHomeFeedJob < ApplicationJob
-
queue_as :engagements
-
-
def perform(instagram_account_id:, max_posts: 3, include_story: true, story_hold_seconds: 18)
-
account = InstagramAccount.find_by(id: instagram_account_id)
-
unless account
-
Ops::StructuredLogger.info(
-
event: "feed_auto_engagement.skipped_missing_account",
-
payload: { instagram_account_id: instagram_account_id }
-
)
-
return
-
end
-
-
result = Instagram::Client.new(account: account).auto_engage_home_feed!(
-
max_posts: max_posts,
-
include_story: include_story,
-
story_hold_seconds: story_hold_seconds
-
)
-
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: {
-
kind: "notice",
-
message: "Auto engagement completed for #{account.username}: posts_commented=#{result[:posts_commented]}, story_replied=#{result[:story_replied]}."
-
}
-
)
-
rescue StandardError => e
-
account ||= InstagramAccount.where(id: instagram_account_id).first
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Auto engagement failed: #{e.message}" }
-
) if account
-
raise
-
end
-
end
-
1
require "digest"
-
1
require "json"
-
1
require "set"
-
-
1
class BuildInstagramProfileHistoryJob < ApplicationJob
-
1
queue_as :ai
-
-
PROFILE_INCOMPLETE_REASON_CODES =
-
1
then: 1
if defined?(ProcessPostMetadataTaggingJob::PROFILE_INCOMPLETE_REASON_CODES)
-
1
ProcessPostMetadataTaggingJob::PROFILE_INCOMPLETE_REASON_CODES
-
else: 0
else
-
%w[
-
latest_posts_not_analyzed
-
insufficient_analyzed_posts
-
no_recent_posts_available
-
missing_structured_post_signals
-
profile_preparation_failed
-
profile_preparation_error
-
].freeze
-
end
-
-
1
MAX_RETRY_ATTEMPTS = ENV.fetch("PROFILE_HISTORY_BUILD_MAX_RETRY_ATTEMPTS", 8).to_i.clamp(1, 30)
-
1
SHORT_RETRY_WAIT_MINUTES = ENV.fetch("PROFILE_HISTORY_BUILD_RETRY_WAIT_MINUTES", 45).to_i.clamp(10, 240)
-
1
FACE_REFRESH_RETRY_WAIT_MINUTES = ENV.fetch("PROFILE_HISTORY_BUILD_FACE_REFRESH_RETRY_WAIT_MINUTES", 15).to_i.clamp(5, 120)
-
1
LONG_RETRY_WAIT_HOURS = ENV.fetch("PROFILE_HISTORY_BUILD_RETRY_WAIT_HOURS", 4).to_i.clamp(1, 24)
-
1
ACTIVE_LOG_LOOKBACK_HOURS = ENV.fetch("PROFILE_HISTORY_BUILD_ACTIVE_LOG_LOOKBACK_HOURS", 12).to_i.clamp(1, 72)
-
-
1
class << self
-
1
def enqueue_with_resume_if_needed!(account:, profile:, trigger_source:, requested_by:, resume_job: nil)
-
else: 0
then: 0
raise ArgumentError, "account is required" unless account
-
else: 0
then: 0
raise ArgumentError, "profile is required" unless profile
-
-
serialized_resume = serialize_resume_job(resume_job)
-
active_log = active_build_history_log(profile: profile)
-
-
then: 0
else: 0
if active_log
-
register_pending_resume_jobs!(log: active_log, jobs: [ serialized_resume ].compact, requested_by: requested_by)
-
return {
-
accepted: true,
-
queued: false,
-
registered: serialized_resume.present?,
-
reason: "build_history_already_running",
-
action_log_id: active_log.id,
-
job_id: active_log.active_job_id,
-
then: 0
else: 0
next_run_at: active_log.metadata.is_a?(Hash) ? active_log.metadata.dig("retry", "next_run_at") : nil
-
}
-
end
-
-
metadata = {
-
requested_by: requested_by.to_s.presence || name,
-
trigger_source: trigger_source.to_s.presence || "system"
-
}
-
then: 0
else: 0
metadata["pending_resume_jobs"] = [ serialized_resume ] if serialized_resume
-
-
log = profile.instagram_profile_action_logs.create!(
-
instagram_account: account,
-
action: "build_history",
-
status: "queued",
-
trigger_source: trigger_source.to_s.presence || "system",
-
occurred_at: Time.current,
-
metadata: metadata
-
)
-
-
job = perform_later(
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
profile_action_log_id: log.id
-
)
-
log.update!(active_job_id: job.job_id, queue_name: job.queue_name)
-
-
{
-
accepted: true,
-
queued: true,
-
registered: serialized_resume.present?,
-
reason: "build_history_queued",
-
action_log_id: log.id,
-
job_id: job.job_id,
-
next_run_at: nil
-
}
-
rescue StandardError => e
-
{
-
accepted: false,
-
queued: false,
-
registered: false,
-
reason: "build_history_enqueue_failed",
-
error_class: e.class.name,
-
error_message: e.message.to_s
-
}
-
end
-
-
1
def serialize_resume_job(resume_job)
-
3
else: 0
then: 3
return nil unless resume_job.is_a?(Hash)
-
-
raw_job_class = resume_job[:job_class] || resume_job["job_class"]
-
raw_kwargs = resume_job[:job_kwargs] || resume_job["job_kwargs"]
-
job_class_name =
-
case raw_job_class
-
when: 0
when Class
-
raw_job_class.name
-
else: 0
else
-
raw_job_class.to_s
-
end
-
then: 0
else: 0
return nil if job_class_name.blank?
-
-
then: 0
else: 0
kwargs = raw_kwargs.is_a?(Hash) ? raw_kwargs.deep_stringify_keys : {}
-
{
-
"job_class" => job_class_name,
-
"job_kwargs" => kwargs,
-
"fingerprint" => resume_fingerprint(job_class_name: job_class_name, job_kwargs: kwargs),
-
"registered_at" => Time.current.iso8601(3)
-
}
-
rescue StandardError
-
nil
-
end
-
-
1
private
-
-
1
def active_build_history_log(profile:)
-
profile.instagram_profile_action_logs
-
.where(action: "build_history", status: %w[queued running])
-
.where("created_at >= ?", ACTIVE_LOG_LOOKBACK_HOURS.hours.ago)
-
.order(created_at: :desc)
-
.first
-
end
-
-
1
def register_pending_resume_jobs!(log:, jobs:, requested_by:)
-
valid_jobs = Array(jobs).select { |row| row.is_a?(Hash) }
-
then: 0
else: 0
return if valid_jobs.empty?
-
-
log.with_lock do
-
then: 0
else: 0
metadata = log.metadata.is_a?(Hash) ? log.metadata.deep_dup : {}
-
pending = Array(metadata["pending_resume_jobs"]).select { |row| row.is_a?(Hash) }
-
existing_fingerprints = pending.map { |row| row["fingerprint"].to_s }.reject(&:blank?).to_set
-
-
valid_jobs.each do |row|
-
fingerprint = row["fingerprint"].to_s
-
then: 0
else: 0
next if fingerprint.present? && existing_fingerprints.include?(fingerprint)
-
-
pending << row
-
then: 0
else: 0
existing_fingerprints << fingerprint if fingerprint.present?
-
end
-
-
metadata["pending_resume_jobs"] = pending
-
metadata["last_resume_registration_at"] = Time.current.iso8601(3)
-
then: 0
else: 0
metadata["requested_by"] = requested_by.to_s if requested_by.to_s.present?
-
log.update!(metadata: metadata)
-
end
-
rescue StandardError
-
nil
-
end
-
-
1
def resume_fingerprint(job_class_name:, job_kwargs:)
-
normalized = normalize_for_fingerprint(job_kwargs)
-
Digest::SHA256.hexdigest("#{job_class_name}:#{JSON.generate(normalized)}")
-
rescue StandardError
-
Digest::SHA256.hexdigest("#{job_class_name}:#{job_kwargs}")
-
end
-
-
1
def normalize_for_fingerprint(value)
-
case value
-
when: 0
when Hash
-
value.keys.map(&:to_s).sort.each_with_object({}) do |key, hash|
-
hash[key] = normalize_for_fingerprint(value[key] || value[key.to_sym])
-
end
-
when: 0
when Array
-
value.map { |row| normalize_for_fingerprint(row) }
-
else: 0
else
-
value
-
end
-
end
-
end
-
-
1
def perform(instagram_account_id:, instagram_profile_id:, profile_action_log_id: nil, attempts: 0, resume_job: nil)
-
2
account = InstagramAccount.find_by(id: instagram_account_id)
-
2
else: 2
then: 0
unless account
-
Ops::StructuredLogger.info(
-
event: "profile_history_build.skipped_missing_account",
-
payload: {
-
instagram_account_id: instagram_account_id,
-
instagram_profile_id: instagram_profile_id
-
}
-
)
-
return
-
end
-
-
2
profile = account.instagram_profiles.find_by(id: instagram_profile_id)
-
2
else: 2
then: 0
unless profile
-
Ops::StructuredLogger.info(
-
event: "profile_history_build.skipped_missing_profile",
-
payload: {
-
instagram_account_id: account.id,
-
instagram_profile_id: instagram_profile_id
-
}
-
)
-
return
-
end
-
-
2
action_log = find_or_create_action_log(
-
account: account,
-
profile: profile,
-
profile_action_log_id: profile_action_log_id
-
)
-
2
register_incoming_resume_job!(action_log: action_log, resume_job: resume_job)
-
2
action_log.mark_running!(extra_metadata: {
-
queue_name: queue_name,
-
active_job_id: job_id,
-
attempts: attempts.to_i
-
})
-
-
2
result = Ai::ProfileHistoryBuildService.new(account: account, profile: profile).execute!
-
2
then: 2
else: 0
history_state = result[:history_state].is_a?(Hash) ? result[:history_state] : {}
-
2
reason_code = result[:reason_code].to_s
-
2
reason = result[:reason].to_s
-
2
status = result[:status].to_s
-
payload = {
-
2
attempts: attempts.to_i,
-
status: status,
-
reason_code: reason_code.presence,
-
reason: reason.presence,
-
history_build: history_state
-
}.compact
-
-
2
case status
-
when: 1
when "ready"
-
1
resume_state = enqueue_pending_resume_jobs!(action_log: action_log, resume_job: resume_job)
-
1
action_log.mark_succeeded!(
-
extra_metadata: payload.merge(
-
resume: resume_state
-
),
-
log_text: "History Ready for #{profile.username}."
-
)
-
when: 0
when "blocked"
-
action_log.mark_succeeded!(
-
extra_metadata: payload.merge(skipped: true),
-
log_text: reason.presence || "History build skipped by policy."
-
)
-
else: 1
else
-
1
retry_state = schedule_retry!(
-
account: account,
-
profile: profile,
-
action_log: action_log,
-
attempts: attempts.to_i,
-
reason_code: reason_code
-
)
-
1
then: 1
if retry_state[:queued]
-
1
queue_payload = payload.merge(
-
retry: {
-
queued: true,
-
next_run_at: retry_state[:next_run_at].iso8601(3),
-
retry_job_id: retry_state[:job_id],
-
wait_seconds: retry_state[:wait_seconds]
-
}
-
)
-
1
action_log.update!(
-
status: "queued",
-
finished_at: nil,
-
metadata: merge_metadata(action_log.metadata, queue_payload),
-
error_message: nil,
-
log_text: "History build pending (#{reason_code.presence || 'in_progress'}). Retry scheduled at #{retry_state[:next_run_at].in_time_zone.iso8601}."
-
)
-
else: 0
else
-
exhausted_payload = payload.merge(
-
retry: retry_state.except(:queued)
-
)
-
action_log.mark_failed!(
-
error_message: "History build pending and retry unavailable (#{reason_code.presence || retry_state[:reason]}).",
-
extra_metadata: exhausted_payload
-
)
-
end
-
end
-
rescue StandardError => e
-
then: 0
else: 0
action_log&.mark_failed!(
-
error_message: e.message,
-
extra_metadata: {
-
active_job_id: job_id,
-
attempts: attempts.to_i
-
}
-
)
-
raise
-
end
-
-
1
private
-
-
1
def find_or_create_action_log(account:, profile:, profile_action_log_id:)
-
2
then: 2
else: 0
log = profile.instagram_profile_action_logs.find_by(id: profile_action_log_id) if profile_action_log_id.present?
-
2
then: 2
else: 0
return log if log
-
-
profile.instagram_profile_action_logs.create!(
-
instagram_account: account,
-
action: "build_history",
-
status: "queued",
-
trigger_source: "job",
-
occurred_at: Time.current,
-
active_job_id: job_id,
-
queue_name: queue_name,
-
metadata: { created_by: self.class.name }
-
)
-
end
-
-
1
def schedule_retry!(account:, profile:, action_log:, attempts:, reason_code:)
-
1
then: 0
else: 1
return { queued: false, reason: "max_attempts_reached" } if attempts >= MAX_RETRY_ATTEMPTS
-
-
1
wait_seconds = retry_wait_seconds_for(reason_code: reason_code)
-
1
run_at = Time.current + wait_seconds.seconds
-
1
job = self.class.set(wait_until: run_at).perform_later(
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
profile_action_log_id: action_log.id,
-
attempts: attempts + 1
-
)
-
-
1
{
-
queued: true,
-
wait_seconds: wait_seconds,
-
next_run_at: run_at,
-
job_id: job.job_id
-
}
-
rescue StandardError => e
-
{
-
queued: false,
-
reason: "retry_enqueue_failed",
-
error_class: e.class.name,
-
error_message: e.message.to_s
-
}
-
end
-
-
1
def retry_wait_seconds_for(reason_code:)
-
1
code = reason_code.to_s
-
1
then: 0
if code == "waiting_for_face_refresh"
-
else: 1
FACE_REFRESH_RETRY_WAIT_MINUTES.minutes.to_i
-
1
then: 1
elsif PROFILE_INCOMPLETE_REASON_CODES.include?(code)
-
1
LONG_RETRY_WAIT_HOURS.hours.to_i
-
else: 0
else
-
SHORT_RETRY_WAIT_MINUTES.minutes.to_i
-
end
-
end
-
-
1
def merge_metadata(base, extra)
-
1
then: 1
else: 0
current = base.is_a?(Hash) ? base : {}
-
1
current.merge(extra.to_h)
-
end
-
-
1
def register_incoming_resume_job!(action_log:, resume_job:)
-
2
serialized = self.class.send(:serialize_resume_job, resume_job)
-
2
else: 0
then: 2
return unless serialized
-
-
self.class.send(
-
:register_pending_resume_jobs!,
-
log: action_log,
-
jobs: [ serialized ],
-
then: 0
else: 0
requested_by: action_log.metadata.is_a?(Hash) ? action_log.metadata["requested_by"] : nil
-
)
-
rescue StandardError
-
nil
-
end
-
-
1
def enqueue_pending_resume_jobs!(action_log:, resume_job:)
-
1
additional = self.class.send(:serialize_resume_job, resume_job)
-
1
then: 1
else: 0
pending = Array(action_log.metadata.is_a?(Hash) ? action_log.metadata["pending_resume_jobs"] : nil)
-
1
pending = pending.select { |row| row.is_a?(Hash) }
-
1
then: 0
else: 1
pending << additional if additional
-
1
pending = dedupe_resume_jobs(rows: pending)
-
1
then: 1
else: 0
return { pending_count: 0, resumed_count: 0, failed_count: 0, failures: [] } if pending.empty?
-
-
resumed = []
-
failures = []
-
still_pending = []
-
-
pending.each do |row|
-
job_class_name = row["job_class"].to_s
-
job_class = job_class_name.safe_constantize
-
else: 0
then: 0
unless job_class.respond_to?(:perform_later)
-
failure = row.merge(
-
"error_class" => "UnresumableJobClass",
-
"error_message" => "Job class not found or not resumable: #{job_class_name}",
-
"failed_at" => Time.current.iso8601(3)
-
)
-
failures << failure
-
still_pending << row
-
next
-
end
-
-
then: 0
else: 0
kwargs = row["job_kwargs"].is_a?(Hash) ? row["job_kwargs"].deep_symbolize_keys : {}
-
job = job_class.perform_later(**kwargs)
-
resumed << row.merge(
-
"resumed_job_id" => job.job_id,
-
"resumed_queue_name" => job.queue_name,
-
"resumed_at" => Time.current.iso8601(3)
-
)
-
rescue StandardError => e
-
failure = row.merge(
-
"error_class" => e.class.name,
-
"error_message" => e.message.to_s,
-
"failed_at" => Time.current.iso8601(3)
-
)
-
failures << failure
-
still_pending << row
-
end
-
-
action_log.with_lock do
-
then: 0
else: 0
metadata = action_log.metadata.is_a?(Hash) ? action_log.metadata.deep_dup : {}
-
existing_resumed = Array(metadata["resumed_jobs"]).select { |row| row.is_a?(Hash) }
-
metadata["resumed_jobs"] = (existing_resumed + resumed).last(60)
-
metadata["pending_resume_jobs"] = still_pending
-
then: 0
else: 0
metadata["resume_failures"] = failures.first(20) if failures.any?
-
metadata["last_resume_attempt_at"] = Time.current.iso8601(3)
-
action_log.update!(metadata: metadata)
-
end
-
-
{
-
pending_count: pending.length,
-
resumed_count: resumed.length,
-
failed_count: failures.length,
-
failures: failures.first(20),
-
resumed_job_ids: resumed.map { |row| row["resumed_job_id"] }.compact.first(30)
-
}
-
rescue StandardError => e
-
{
-
pending_count: 0,
-
resumed_count: 0,
-
failed_count: 1,
-
failures: [
-
{
-
"error_class" => e.class.name,
-
"error_message" => e.message.to_s
-
}
-
]
-
}
-
end
-
-
1
def dedupe_resume_jobs(rows:)
-
1
seen = Set.new
-
1
Array(rows).each_with_object([]) do |row, out|
-
else: 0
then: 0
next unless row.is_a?(Hash)
-
-
fingerprint = row["fingerprint"].to_s
-
then: 0
else: 0
if fingerprint.blank?
-
fingerprint = Digest::SHA256.hexdigest("#{row['job_class']}:#{row['job_kwargs']}")
-
end
-
then: 0
else: 0
next if seen.include?(fingerprint)
-
-
seen << fingerprint
-
out << row.merge("fingerprint" => fingerprint)
-
end
-
end
-
end
-
class CaptureHomeFeedJob < ApplicationJob
-
queue_as :sync
-
-
def perform(instagram_account_id:, rounds: 4, delay_seconds: 45, max_new: 20)
-
account = InstagramAccount.find(instagram_account_id)
-
client = Instagram::Client.new(account: account)
-
-
result = client.capture_home_feed_posts!(rounds: rounds, delay_seconds: delay_seconds, max_new: max_new)
-
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Feed capture completed for #{account.username}: new=#{result[:new_posts]}, seen=#{result[:seen_posts]}." }
-
)
-
rescue StandardError => e
-
account ||= InstagramAccount.where(id: instagram_account_id).first
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Feed capture failed: #{e.message}" }
-
) if account
-
raise
-
end
-
end
-
-
require "stringio"
-
require "net/http"
-
-
class CaptureInstagramProfilePostsJob < ApplicationJob
-
queue_as :post_downloads
-
-
DOWNLOAD_TARGET_RECENT_POSTS = 50
-
CAPTURE_FETCH_LIMIT = 120
-
-
retry_on Net::OpenTimeout, Net::ReadTimeout, wait: :polynomially_longer, attempts: 4
-
retry_on Errno::ECONNRESET, Errno::ECONNREFUSED, wait: :polynomially_longer, attempts: 4
-
retry_on Timeout::Error, wait: :polynomially_longer, attempts: 3
-
-
def perform(instagram_account_id:, instagram_profile_id:, profile_action_log_id: nil, comments_limit: 20)
-
account = InstagramAccount.find(instagram_account_id)
-
profile = account.instagram_profiles.find(instagram_profile_id)
-
comments_limit_i = comments_limit.to_i.clamp(1, 30)
-
action_log = find_or_create_action_log(
-
account: account,
-
profile: profile,
-
profile_action_log_id: profile_action_log_id
-
)
-
action_log.mark_running!(extra_metadata: {
-
queue_name: queue_name,
-
active_job_id: job_id,
-
comments_limit: comments_limit_i
-
})
-
-
Ops::StructuredLogger.info(
-
event: "profile_posts_capture.started",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
profile_username: profile.username,
-
comments_limit: comments_limit_i
-
}
-
)
-
-
policy_decision = Instagram::ProfileScanPolicy.new(profile: profile).decision
-
if policy_decision[:skip_scan]
-
if policy_decision[:reason_code].to_s == "non_personal_profile_page" || policy_decision[:reason_code].to_s == "scan_excluded_tag"
-
Instagram::ProfileScanPolicy.mark_scan_excluded!(profile: profile)
-
end
-
-
action_log.mark_succeeded!(
-
extra_metadata: {
-
skipped: true,
-
skip_reason_code: policy_decision[:reason_code],
-
skip_reason: policy_decision[:reason],
-
followers_count: policy_decision[:followers_count],
-
max_followers: policy_decision[:max_followers]
-
},
-
log_text: "Skipped profile post capture: #{policy_decision[:reason]}"
-
)
-
return
-
end
-
-
collected = Instagram::ProfileAnalysisCollector.new(account: account, profile: profile).collect_and_persist!(
-
posts_limit: CAPTURE_FETCH_LIMIT,
-
comments_limit: comments_limit_i,
-
track_missing_as_deleted: true,
-
sync_source: "profile_posts_manual_capture",
-
download_media: false
-
)
-
-
persisted_posts = Array(collected[:posts])
-
summary = collected[:summary].is_a?(Hash) ? collected[:summary] : {}
-
created_shortcodes = Array(summary[:created_shortcodes])
-
updated_shortcodes = Array(summary[:updated_shortcodes])
-
restored_shortcodes = Array(summary[:restored_shortcodes])
-
deleted_shortcodes = Array(summary[:deleted_shortcodes])
-
-
event_counts = create_post_capture_events!(
-
profile: profile,
-
posts: persisted_posts,
-
created_shortcodes: created_shortcodes,
-
restored_shortcodes: restored_shortcodes,
-
deleted_shortcodes: deleted_shortcodes
-
)
-
-
download_plan = build_download_plan(profile: profile)
-
queued_downloads = enqueue_profile_post_downloads!(
-
account: account,
-
profile: profile,
-
posts: download_plan[:to_queue]
-
)
-
-
profile.update!(last_synced_at: Time.current)
-
-
Ops::StructuredLogger.info(
-
event: "profile_posts_capture.completed",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
profile_username: profile.username,
-
fetched_posts: persisted_posts.length,
-
created_count: summary[:created_count].to_i,
-
restored_count: summary[:restored_count].to_i,
-
updated_count: summary[:updated_count].to_i,
-
unchanged_count: summary[:unchanged_count].to_i,
-
deleted_count: summary[:deleted_count].to_i,
-
recent_download_target: DOWNLOAD_TARGET_RECENT_POSTS,
-
recent_downloadable_posts: download_plan[:recent_candidates].length,
-
recent_already_downloaded: download_plan[:already_downloaded_count],
-
recent_missing_downloads: download_plan[:missing_count],
-
queued_download_jobs: queued_downloads[:queued_count],
-
queue_failures: queued_downloads[:failures].length,
-
captured_events_count: event_counts[:captured],
-
deleted_events_count: event_counts[:deleted],
-
restored_events_count: event_counts[:restored],
-
downloadable_manifest_count: download_plan[:manifest].length
-
}
-
)
-
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: {
-
kind: "notice",
-
message: "Post capture completed for #{profile.username}. New: #{summary[:created_count].to_i}, restored: #{summary[:restored_count].to_i}, deleted flagged: #{summary[:deleted_count].to_i}, queued downloads: #{queued_downloads[:queued_count]}, already downloaded in recent set: #{download_plan[:already_downloaded_count]}/#{DOWNLOAD_TARGET_RECENT_POSTS}."
-
}
-
)
-
-
action_log.mark_succeeded!(
-
extra_metadata: {
-
fetched_posts: persisted_posts.length,
-
created_count: summary[:created_count].to_i,
-
restored_count: summary[:restored_count].to_i,
-
updated_count: summary[:updated_count].to_i,
-
unchanged_count: summary[:unchanged_count].to_i,
-
deleted_count: summary[:deleted_count].to_i,
-
feed_fetch: summary[:feed_fetch].is_a?(Hash) ? summary[:feed_fetch] : {},
-
created_shortcodes: created_shortcodes.first(40),
-
updated_shortcodes: updated_shortcodes.first(40),
-
restored_shortcodes: restored_shortcodes.first(40),
-
deleted_shortcodes: deleted_shortcodes.first(40),
-
recent_download_target: DOWNLOAD_TARGET_RECENT_POSTS,
-
recent_downloadable_posts: download_plan[:recent_candidates].length,
-
recent_already_downloaded: download_plan[:already_downloaded_count],
-
recent_missing_downloads: download_plan[:missing_count],
-
queued_download_jobs: queued_downloads[:queued_count],
-
queued_download_post_ids: queued_downloads[:post_ids].first(DOWNLOAD_TARGET_RECENT_POSTS),
-
queue_failures: queued_downloads[:failures].first(20),
-
download_manifest: download_plan[:manifest].first(DOWNLOAD_TARGET_RECENT_POSTS),
-
captured_events_count: event_counts[:captured]
-
},
-
log_text: "Captured posts (new=#{summary[:created_count].to_i}, restored=#{summary[:restored_count].to_i}, updated=#{summary[:updated_count].to_i}, deleted=#{summary[:deleted_count].to_i}, queued_downloads=#{queued_downloads[:queued_count]}, already_downloaded_recent=#{download_plan[:already_downloaded_count]})."
-
)
-
rescue StandardError => e
-
Ops::StructuredLogger.error(
-
event: "profile_posts_capture.failed",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: account&.id,
-
instagram_profile_id: profile&.id,
-
error_class: e.class.name,
-
error_message: e.message.to_s
-
}
-
)
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Profile post capture failed: #{e.message}" }
-
) if account
-
action_log&.mark_failed!(error_message: e.message, extra_metadata: { active_job_id: job_id })
-
raise
-
end
-
-
private
-
-
def find_or_create_action_log(account:, profile:, profile_action_log_id:)
-
log = profile.instagram_profile_action_logs.find_by(id: profile_action_log_id) if profile_action_log_id.present?
-
return log if log
-
-
profile.instagram_profile_action_logs.create!(
-
instagram_account: account,
-
action: "capture_profile_posts",
-
status: "queued",
-
trigger_source: "job",
-
occurred_at: Time.current,
-
active_job_id: job_id,
-
queue_name: queue_name,
-
metadata: { created_by: self.class.name }
-
)
-
end
-
-
def create_post_capture_events!(profile:, posts:, created_shortcodes:, restored_shortcodes:, deleted_shortcodes:)
-
by_shortcode = posts.index_by { |post| post.shortcode.to_s }
-
counts = { captured: 0, deleted: 0, restored: 0 }
-
-
created_shortcodes.each do |shortcode|
-
post = by_shortcode[shortcode.to_s] || profile.instagram_profile_posts.find_by(shortcode: shortcode.to_s)
-
next unless post
-
-
event = profile.record_event!(
-
kind: "profile_post_captured",
-
external_id: "profile_post_captured:#{post.shortcode}",
-
occurred_at: post.taken_at || Time.current,
-
metadata: profile_post_event_metadata(post: post, reason: "new_capture")
-
)
-
attach_post_media_to_event(event: event, post: post)
-
counts[:captured] += 1
-
end
-
-
restored_shortcodes.each do |shortcode|
-
post = by_shortcode[shortcode.to_s] || profile.instagram_profile_posts.find_by(shortcode: shortcode.to_s)
-
next unless post
-
-
profile.record_event!(
-
kind: "profile_post_restored",
-
external_id: "profile_post_restored:#{post.shortcode}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: profile_post_event_metadata(post: post, reason: "restored_in_capture")
-
)
-
counts[:restored] += 1
-
end
-
-
deleted_shortcodes.each do |shortcode|
-
post = profile.instagram_profile_posts.find_by(shortcode: shortcode.to_s)
-
profile.record_event!(
-
kind: "profile_post_deleted_detected",
-
external_id: "profile_post_deleted_detected:#{shortcode}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "profile_posts_manual_capture",
-
shortcode: shortcode,
-
instagram_profile_post_id: post&.id,
-
deleted_from_source: true,
-
preserved_in_history: true
-
}
-
)
-
counts[:deleted] += 1
-
end
-
-
counts
-
end
-
-
def build_download_plan(profile:)
-
recent_candidates = profile.instagram_profile_posts
-
.with_attached_media
-
.recent_first
-
.limit(CAPTURE_FETCH_LIMIT)
-
.select { |post| downloadable_profile_post?(post) }
-
.first(DOWNLOAD_TARGET_RECENT_POSTS)
-
-
already_downloaded_count = recent_candidates.count { |post| post.media.attached? }
-
missing_posts = recent_candidates.reject { |post| post.media.attached? }
-
required = [DOWNLOAD_TARGET_RECENT_POSTS - already_downloaded_count, 0].max
-
to_queue = missing_posts.first(required)
-
-
manifest = recent_candidates.map do |post|
-
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
{
-
post_id: post.id,
-
shortcode: post.shortcode,
-
post_kind: metadata["post_kind"].to_s.presence || "post",
-
product_type: metadata["product_type"].to_s.presence,
-
repost: ActiveModel::Type::Boolean.new.cast(metadata["is_repost"]),
-
media_type: metadata["media_type"],
-
media_id: metadata["media_id"],
-
media_url: post.source_media_url.to_s.presence || metadata["media_url_video"].to_s.presence || metadata["media_url_image"].to_s.presence,
-
taken_at: post.taken_at&.iso8601,
-
downloaded: post.media.attached?
-
}.compact
-
end
-
-
{
-
recent_candidates: recent_candidates,
-
already_downloaded_count: already_downloaded_count,
-
missing_count: missing_posts.length,
-
to_queue: to_queue,
-
manifest: manifest
-
}
-
end
-
-
def enqueue_profile_post_downloads!(account:, profile:, posts:)
-
post_ids = []
-
failures = []
-
-
Array(posts).each do |post|
-
next unless post
-
next unless downloadable_profile_post?(post)
-
-
mark_download_queued!(post: post)
-
job = DownloadInstagramProfilePostMediaJob.perform_later(
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
trigger_analysis: true
-
)
-
post_ids << post.id
-
profile.record_event!(
-
kind: "profile_post_media_download_queued",
-
external_id: "profile_post_media_download_queued:#{post.id}:#{job.job_id}",
-
occurred_at: Time.current,
-
metadata: {
-
source: self.class.name,
-
instagram_profile_post_id: post.id,
-
shortcode: post.shortcode,
-
active_job_id: job.job_id
-
}
-
)
-
rescue StandardError => e
-
failures << {
-
instagram_profile_post_id: post&.id,
-
shortcode: post&.shortcode.to_s.presence,
-
error_class: e.class.name,
-
error_message: e.message.to_s.byteslice(0, 220)
-
}.compact
-
next
-
end
-
-
{
-
queued_count: post_ids.length,
-
post_ids: post_ids,
-
failures: failures
-
}
-
end
-
-
def downloadable_profile_post?(post)
-
return false unless post
-
return false if ActiveModel::Type::Boolean.new.cast(post.metadata.is_a?(Hash) ? post.metadata["deleted_from_source"] : nil)
-
-
source_url = post.source_media_url.to_s.strip
-
return true if source_url.present?
-
-
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
metadata["media_url_video"].to_s.strip.present? || metadata["media_url_image"].to_s.strip.present?
-
end
-
-
def mark_download_queued!(post:)
-
metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
post.update!(
-
metadata: metadata.merge(
-
"download_status" => "queued",
-
"download_queued_at" => Time.current.utc.iso8601(3),
-
"download_queued_by" => self.class.name,
-
"download_error" => nil
-
)
-
)
-
end
-
-
def profile_post_event_metadata(post:, reason:)
-
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
{
-
source: "profile_posts_manual_capture",
-
shortcode: post.shortcode,
-
reason: reason.to_s,
-
instagram_profile_post_id: post.id,
-
permalink: post.permalink_url,
-
likes_count: post.likes_count,
-
comments_count: post.comments_count,
-
media_type: metadata["media_type"],
-
media_id: metadata["media_id"],
-
deleted_from_source: false
-
}
-
end
-
-
def attach_post_media_to_event(event:, post:)
-
return unless event
-
return unless post.media.attached?
-
return if event.media.attached?
-
-
event.media.attach(post.media.blob)
-
rescue StandardError => e
-
Rails.logger.warn("[CaptureInstagramProfilePostsJob] unable to attach post media to event #{event&.id}: #{e.class}: #{e.message}")
-
end
-
end
-
require "net/http"
-
require "json"
-
-
class CheckAiMicroserviceHealthJob < ApplicationJob
-
queue_as :sync
-
-
def perform
-
health = Ops::LocalAiHealth.check(force: true)
-
ok = ActiveModel::Type::Boolean.new.cast(health[:ok])
-
-
message =
-
if ok
-
"Local AI stack healthy"
-
else
-
"Local AI stack unhealthy"
-
end
-
-
Ops::IssueTracker.record_ai_service_check!(
-
ok: ok,
-
message: message,
-
metadata: health
-
)
-
rescue StandardError => e
-
Ops::IssueTracker.record_ai_service_check!(
-
ok: false,
-
message: "AI microservice health check failed: #{e.message}",
-
metadata: { error_class: e.class.name }
-
)
-
raise
-
end
-
end
-
class CheckQueueHealthJob < ApplicationJob
-
queue_as :sync
-
-
def perform
-
Ops::QueueHealth.check!
-
end
-
end
-
module ScheduledAccountBatching
-
extend ActiveSupport::Concern
-
-
MAX_ACCOUNT_BATCH_SIZE = 200
-
MAX_CONTINUATION_WAIT_SECONDS = 300
-
-
private
-
-
def normalize_scheduler_params(opts, kwargs, defaults)
-
from_opts = opts.is_a?(Hash) ? opts.symbolize_keys : {}
-
defaults.merge(from_opts).merge(kwargs.symbolize_keys)
-
end
-
-
def load_account_batch(scope:, cursor_id:, batch_size:)
-
table = InstagramAccount.arel_table
-
capped_batch_size = batch_size.to_i.clamp(1, MAX_ACCOUNT_BATCH_SIZE)
-
-
ordered_scope = scope.reorder(table[:id].asc)
-
if cursor_id.to_i.positive?
-
ordered_scope = ordered_scope.where(table[:id].gt(cursor_id.to_i))
-
end
-
-
accounts = ordered_scope.limit(capped_batch_size).to_a
-
next_cursor_id = accounts.last&.id
-
has_more = next_cursor_id.present? && scope.where(table[:id].gt(next_cursor_id.to_i)).exists?
-
-
{
-
accounts: accounts,
-
batch_size: capped_batch_size,
-
next_cursor_id: next_cursor_id,
-
has_more: has_more
-
}
-
end
-
-
def schedule_account_batch_continuation!(wait_seconds:, payload:)
-
args = payload.is_a?(Hash) ? payload.compact : {}
-
return nil if args.empty?
-
-
capped_wait = wait_seconds.to_i.clamp(0, MAX_CONTINUATION_WAIT_SECONDS)
-
if capped_wait.positive?
-
self.class.set(wait: capped_wait.seconds).perform_later(**args)
-
else
-
self.class.perform_later(**args)
-
end
-
end
-
end
-
require "net/http"
-
require "digest"
-
-
class DownloadInstagramPostMediaJob < ApplicationJob
-
queue_as :post_downloads
-
-
MAX_IMAGE_BYTES = 6 * 1024 * 1024
-
MAX_VIDEO_BYTES = 80 * 1024 * 1024
-
-
def perform(instagram_post_id:)
-
post = InstagramPost.find(instagram_post_id)
-
if post.media.attached?
-
integrity = blob_integrity_for(post.media.blob)
-
return if integrity[:valid]
-
end
-
-
url = post.media_url.to_s.strip
-
return if url.blank?
-
-
return if attach_media_from_local_cache!(post: post)
-
-
io, content_type, filename = download(url)
-
blob = ActiveStorage::Blob.create_and_upload!(
-
io: io,
-
filename: filename,
-
content_type: content_type,
-
identify: false
-
)
-
attach_blob_to_post!(post: post, blob: blob)
-
post.update!(media_downloaded_at: Time.current)
-
rescue StandardError
-
post&.update!(purge_at: 6.hours.from_now) if post
-
raise
-
ensure
-
begin
-
io&.close
-
rescue StandardError
-
nil
-
end
-
end
-
-
private
-
-
def download(url)
-
uri = URI.parse(url)
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.use_ssl = (uri.scheme == "https")
-
http.open_timeout = 10
-
http.read_timeout = 30
-
-
req = Net::HTTP::Get.new(uri.request_uri)
-
req["Accept"] = "*/*"
-
req["User-Agent"] = "Mozilla/5.0"
-
res = http.request(req)
-
raise "media download failed: HTTP #{res.code}" unless res.is_a?(Net::HTTPSuccess)
-
-
body = res.body.to_s
-
content_type = res["content-type"].to_s.split(";").first.presence || "application/octet-stream"
-
limit = content_type.start_with?("video/") ? MAX_VIDEO_BYTES : MAX_IMAGE_BYTES
-
raise "empty media payload" if body.bytesize <= 0
-
raise "media too large" if body.bytesize > limit
-
raise "unexpected html payload" if html_payload?(body)
-
validate_known_signature!(body: body, content_type: content_type)
-
-
ext = extension_for_content_type(content_type)
-
io = StringIO.new(body)
-
io.set_encoding(Encoding::BINARY) if io.respond_to?(:set_encoding)
-
[io, content_type, "post_#{Digest::SHA256.hexdigest(url)[0, 12]}.#{ext}"]
-
end
-
-
def blob_integrity_for(blob)
-
return { valid: false, reason: "missing_blob" } unless blob
-
return { valid: false, reason: "non_positive_byte_size" } if blob.byte_size.to_i <= 0
-
-
service = blob.service
-
if service.respond_to?(:path_for, true)
-
path = service.send(:path_for, blob.key)
-
return { valid: false, reason: "missing_file_on_disk" } unless path && File.exist?(path)
-
-
file_size = File.size(path)
-
return { valid: false, reason: "zero_byte_file" } if file_size <= 0
-
return { valid: false, reason: "byte_size_mismatch" } if blob.byte_size.to_i.positive? && file_size != blob.byte_size.to_i
-
end
-
-
{ valid: true, reason: nil }
-
rescue StandardError
-
{ valid: false, reason: "integrity_check_error" }
-
end
-
-
def html_payload?(body)
-
sample = body.to_s.byteslice(0, 4096).to_s.downcase
-
sample.include?("<html") || sample.start_with?("<!doctype html")
-
end
-
-
def validate_known_signature!(body:, content_type:)
-
type = content_type.to_s.downcase
-
return if type.blank?
-
return if type.include?("octet-stream")
-
-
case
-
when type.include?("jpeg")
-
raise "invalid jpeg signature" unless body.start_with?("\xFF\xD8".b)
-
when type.include?("png")
-
raise "invalid png signature" unless body.start_with?("\x89PNG\r\n\x1A\n".b)
-
when type.include?("gif")
-
raise "invalid gif signature" unless body.start_with?("GIF87a".b) || body.start_with?("GIF89a".b)
-
when type.include?("webp")
-
raise "invalid webp signature" unless body.bytesize >= 12 && body.byteslice(0, 4) == "RIFF" && body.byteslice(8, 4) == "WEBP"
-
when type.start_with?("video/")
-
raise "invalid video signature" unless body.bytesize >= 12 && body.byteslice(4, 4) == "ftyp"
-
end
-
end
-
-
def attach_blob_to_post!(post:, blob:)
-
raise "missing blob for attach" unless blob
-
-
if post.media.attached? && post.media.attachment.present?
-
attachment = post.media.attachment
-
attachment.update!(blob: blob) if attachment.blob_id != blob.id
-
return
-
end
-
-
post.media.attach(blob)
-
end
-
-
def extension_for_content_type(content_type)
-
return "jpg" if content_type.include?("jpeg")
-
return "png" if content_type.include?("png")
-
return "webp" if content_type.include?("webp")
-
return "gif" if content_type.include?("gif")
-
return "mp4" if content_type.include?("mp4")
-
return "mov" if content_type.include?("quicktime")
-
-
"bin"
-
end
-
-
def attach_media_from_local_cache!(post:)
-
blob = cached_media_blob_for(post: post)
-
return false unless blob
-
-
attach_blob_to_post!(post: post, blob: blob)
-
post.update!(media_downloaded_at: Time.current)
-
true
-
rescue StandardError => e
-
Rails.logger.warn("[DownloadInstagramPostMediaJob] local media cache attach failed post_id=#{post.id}: #{e.class}: #{e.message}")
-
false
-
end
-
-
def cached_media_blob_for(post:)
-
shortcode = post.shortcode.to_s.strip
-
return nil if shortcode.blank?
-
-
cached_feed_post = InstagramPost
-
.joins(:media_attachment)
-
.where(shortcode: shortcode)
-
.where.not(id: post.id)
-
.order(media_downloaded_at: :desc, id: :desc)
-
.first
-
if cached_feed_post&.media&.attached?
-
blob = cached_feed_post.media.blob
-
return blob if blob_integrity_for(blob)[:valid]
-
end
-
-
cached_profile_post = InstagramProfilePost
-
.joins(:media_attachment)
-
.where(shortcode: shortcode)
-
.order(updated_at: :desc, id: :desc)
-
.first
-
if cached_profile_post&.media&.attached?
-
blob = cached_profile_post.media.blob
-
return blob if blob_integrity_for(blob)[:valid]
-
end
-
-
nil
-
end
-
end
-
require "net/http"
-
require "digest"
-
require "cgi"
-
require "uri"
-
-
class DownloadInstagramProfileAvatarJob < ApplicationJob
-
queue_as :avatars
-
-
def perform(instagram_account_id:, instagram_profile_id:, broadcast: true, force: false, profile_action_log_id: nil)
-
account = InstagramAccount.find(instagram_account_id)
-
profile = account.instagram_profiles.find(instagram_profile_id)
-
action_log = find_or_create_action_log(
-
account: account,
-
profile: profile,
-
action: "sync_avatar",
-
profile_action_log_id: profile_action_log_id
-
)
-
action_log.mark_running!(extra_metadata: { queue_name: queue_name, active_job_id: job_id, force: force })
-
-
raw_url = profile.profile_pic_url.to_s
-
url = Instagram::AvatarUrlNormalizer.normalize(raw_url)
-
if url.blank?
-
# Nothing to download; leave the attachment blank and allow UI default avatar fallback.
-
profile.update!(
-
profile_pic_url: (raw_url.present? ? nil : profile.profile_pic_url),
-
avatar_url_fingerprint: nil,
-
avatar_synced_at: Time.current
-
)
-
action_log.mark_succeeded!(
-
extra_metadata: {
-
skipped: true,
-
reason: raw_url.present? ? "invalid_or_placeholder_avatar_url" : "avatar_url_blank",
-
profile_pic_url_raw: raw_url.presence
-
},
-
log_text: raw_url.present? ? "Avatar URL invalid/placeholder; skipped download" : "Avatar URL blank; marked as synced with no attachment"
-
)
-
return
-
end
-
-
fp = url_fingerprint(url)
-
-
# Skip if we already have the latest avatar attached.
-
if profile.avatar.attached? && !force && profile.avatar_url_fingerprint.to_s == fp
-
action_log.mark_succeeded!(log_text: "Avatar unchanged; skipped download", extra_metadata: { skipped: true })
-
return
-
end
-
-
io, filename, content_type = fetch_url(url, user_agent: account.user_agent)
-
-
attach_avatar!(
-
profile: profile,
-
io: io,
-
filename: filename,
-
content_type: content_type
-
)
-
-
avatar_changed = profile.avatar_url_fingerprint.present? && profile.avatar_url_fingerprint != fp
-
profile.update!(avatar_url_fingerprint: fp, avatar_synced_at: Time.current)
-
-
if avatar_changed
-
event = profile.record_event!(
-
kind: "avatar_changed",
-
external_id: fp,
-
occurred_at: nil,
-
metadata: { profile_pic_url: url }
-
)
-
begin
-
event.media.attach(profile.avatar.blob) if profile.avatar.attached?
-
rescue StandardError
-
nil
-
end
-
else
-
profile.record_event!(
-
kind: "avatar_synced",
-
external_id: fp,
-
occurred_at: nil,
-
metadata: { profile_pic_url: url }
-
)
-
end
-
-
if broadcast
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Downloaded avatar for #{profile.username}." }
-
)
-
end
-
action_log.mark_succeeded!(
-
extra_metadata: { fingerprint: fp, avatar_changed: avatar_changed, profile_pic_url: url },
-
log_text: "Avatar sync complete"
-
)
-
rescue StandardError => e
-
if broadcast
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Avatar download failed: #{e.message}" }
-
)
-
end
-
action_log&.mark_failed!(error_message: e.message, extra_metadata: { active_job_id: job_id })
-
raise
-
end
-
-
private
-
-
def attach_avatar!(profile:, io:, filename:, content_type:)
-
attachment = profile.avatar_attachment
-
-
unless attachment.present?
-
profile.avatar.attach(
-
io: io,
-
filename: filename,
-
content_type: content_type
-
)
-
return
-
end
-
-
# Avoid destroying the attachment row because ActiveStorageIngestion keeps
-
# a foreign-key reference to attachment ids for storage observability.
-
new_blob = ActiveStorage::Blob.create_and_upload!(
-
io: io,
-
filename: filename,
-
content_type: content_type
-
)
-
attachment.update!(blob: new_blob)
-
end
-
-
def find_or_create_action_log(account:, profile:, action:, profile_action_log_id:)
-
log = profile.instagram_profile_action_logs.find_by(id: profile_action_log_id) if profile_action_log_id.present?
-
return log if log
-
-
profile.instagram_profile_action_logs.create!(
-
instagram_account: account,
-
action: action,
-
status: "queued",
-
trigger_source: "job",
-
occurred_at: Time.current,
-
active_job_id: job_id,
-
queue_name: queue_name,
-
metadata: { created_by: self.class.name }
-
)
-
end
-
-
def url_fingerprint(url)
-
uri = URI.parse(url)
-
# Instagram CDN URLs often rotate query params; host+path is the stable signal for "same image".
-
base = "#{uri.host}#{uri.path}"
-
Digest::SHA256.hexdigest(base)
-
rescue StandardError
-
Digest::SHA256.hexdigest(url.to_s)
-
end
-
-
def fetch_url(url, user_agent:, redirects_left: 4)
-
raise "Too many redirects" if redirects_left.negative?
-
-
uri = URI.parse(url)
-
raise "Invalid URL" unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
-
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.use_ssl = (uri.scheme == "https")
-
http.open_timeout = 10
-
http.read_timeout = 20
-
-
req = Net::HTTP::Get.new(uri.request_uri)
-
req["User-Agent"] = user_agent.presence || "Mozilla/5.0"
-
req["Accept"] = "image/avif,image/webp,image/apng,image/*,*/*;q=0.8"
-
req["Referer"] = "https://www.instagram.com/"
-
-
res = http.request(req)
-
-
# Handle simple redirects (CDN often redirects).
-
if res.is_a?(Net::HTTPRedirection) && res["location"].present?
-
redirected_url = normalize_redirect_url(base_uri: uri, location: res["location"])
-
raise "Invalid redirect URL" if redirected_url.blank?
-
-
return fetch_url(redirected_url, user_agent: user_agent, redirects_left: redirects_left - 1)
-
end
-
-
raise "HTTP #{res.code}" unless res.is_a?(Net::HTTPSuccess)
-
-
body = res.body
-
raise "Empty response body" if body.blank?
-
-
filename = File.basename(uri.path.presence || "avatar.jpg")
-
filename = "avatar.jpg" if filename.blank? || filename == "/"
-
content_type = res["content-type"].to_s.split(";").first.presence || "image/jpeg"
-
-
io = StringIO.new(body)
-
[io, filename, content_type]
-
end
-
-
def normalize_redirect_url(base_uri:, location:)
-
target = URI.join(base_uri.to_s, location.to_s).to_s
-
uri = URI.parse(target)
-
return nil unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
-
-
uri.to_s
-
rescue URI::InvalidURIError, ArgumentError
-
nil
-
end
-
end
-
require "net/http"
-
require "digest"
-
require "stringio"
-
-
class DownloadInstagramProfilePostMediaJob < ApplicationJob
-
queue_as :post_downloads
-
-
MAX_IMAGE_BYTES = 6 * 1024 * 1024
-
MAX_VIDEO_BYTES = 80 * 1024 * 1024
-
MAX_PREVIEW_IMAGE_BYTES = 3 * 1024 * 1024
-
PROFILE_POST_PREVIEW_ENQUEUE_TTL_SECONDS = 30.minutes
-
-
retry_on Net::OpenTimeout, Net::ReadTimeout, wait: :polynomially_longer, attempts: 4
-
retry_on Errno::ECONNRESET, Errno::ECONNREFUSED, wait: :polynomially_longer, attempts: 4
-
retry_on Timeout::Error, wait: :polynomially_longer, attempts: 3
-
-
def perform(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, trigger_analysis: true)
-
account = InstagramAccount.find(instagram_account_id)
-
profile = account.instagram_profiles.find(instagram_profile_id)
-
post = profile.instagram_profile_posts.find(instagram_profile_post_id)
-
trigger_analysis_bool = ActiveModel::Type::Boolean.new.cast(trigger_analysis)
-
-
analysis_state = { queued: false, reason: "analysis_trigger_disabled" }
-
download_state = nil
-
post.with_lock do
-
download_state = ensure_media_downloaded!(profile: profile, post: post)
-
should_enqueue_analysis =
-
trigger_analysis_bool &&
-
%w[downloaded already_downloaded].include?(download_state[:status].to_s)
-
if should_enqueue_analysis
-
analysis_state = enqueue_analysis_if_allowed!(account: account, profile: profile, post: post)
-
elsif trigger_analysis_bool
-
analysis_state = { queued: false, reason: "download_not_completed" }
-
end
-
end
-
-
Ops::StructuredLogger.info(
-
event: "profile_post_media_download.completed",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
shortcode: post.shortcode,
-
download_status: download_state[:status],
-
download_source: download_state[:source],
-
analysis_queued: analysis_state[:queued],
-
analysis_reason: analysis_state[:reason],
-
analysis_job_id: analysis_state[:job_id]
-
}
-
)
-
rescue StandardError => e
-
mark_download_failed!(post: post, error: e) if defined?(post) && post
-
Ops::StructuredLogger.error(
-
event: "profile_post_media_download.failed",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: account&.id || instagram_account_id,
-
instagram_profile_id: profile&.id || instagram_profile_id,
-
instagram_profile_post_id: post&.id || instagram_profile_post_id,
-
error_class: e.class.name,
-
error_message: e.message.to_s
-
}
-
)
-
raise
-
end
-
-
private
-
-
def ensure_media_downloaded!(profile:, post:)
-
return mark_download_skipped!(profile: profile, post: post, reason: "deleted_from_source") if post_deleted?(post)
-
-
media_url = resolve_media_url(post)
-
return mark_download_skipped!(profile: profile, post: post, reason: "missing_media_url") if media_url.blank?
-
-
attached_and_valid = false
-
if post.media.attached?
-
integrity = blob_integrity_for(post.media.blob)
-
if integrity[:valid]
-
attached_and_valid = true
-
else
-
mark_corrupt_media_detected!(post: post, reason: integrity[:reason])
-
end
-
end
-
-
if attached_and_valid
-
ensure_preview_image_for_video!(post: post, media_url: media_url)
-
record_download_success!(profile: profile, post: post, source: "already_attached", media_url: media_url)
-
return { status: "already_downloaded", source: "already_attached" }
-
end
-
-
if attach_media_from_local_cache!(post: post)
-
ensure_preview_image_for_video!(post: post, media_url: media_url)
-
record_download_success!(profile: profile, post: post, source: "local_cache", media_url: media_url)
-
return { status: "downloaded", source: "local_cache" }
-
end
-
-
io = nil
-
begin
-
io, content_type, filename = download_media(media_url)
-
blob = ActiveStorage::Blob.create_and_upload!(
-
io: io,
-
filename: filename,
-
content_type: content_type,
-
identify: false
-
)
-
attach_blob_to_post!(post: post, blob: blob)
-
downloaded_bytes = io.respond_to?(:string) ? io.string.to_s : nil
-
ensure_preview_image_for_video!(post: post, media_url: media_url, video_bytes: downloaded_bytes, content_type: content_type)
-
post.update!(
-
media_url_fingerprint: Digest::SHA256.hexdigest(media_url),
-
metadata: merged_metadata(post: post).merge(
-
"download_status" => "downloaded",
-
"download_source" => "remote",
-
"downloaded_at" => Time.current.utc.iso8601(3),
-
"download_error" => nil
-
)
-
)
-
record_download_success!(profile: profile, post: post, source: "remote", media_url: media_url)
-
{ status: "downloaded", source: "remote" }
-
ensure
-
io&.close if io.respond_to?(:close)
-
end
-
end
-
-
def enqueue_analysis_if_allowed!(account:, profile:, post:)
-
policy_decision = Instagram::ProfileScanPolicy.new(profile: profile).decision
-
if policy_decision[:skip_post_analysis]
-
Instagram::ProfileScanPolicy.mark_post_analysis_skipped!(post: post, decision: policy_decision)
-
return {
-
queued: false,
-
reason: "policy_blocked",
-
skip_reason_code: policy_decision[:reason_code].to_s
-
}
-
end
-
-
return { queued: false, reason: "already_analyzed" } if post.ai_status.to_s == "analyzed" && post.analyzed_at.present?
-
-
fingerprint = analysis_enqueue_fingerprint(post)
-
metadata = merged_metadata(post: post)
-
if post.ai_status.to_s == "pending" && metadata["analysis_enqueued_fingerprint"].to_s == fingerprint
-
return { queued: false, reason: "already_queued_for_current_media" }
-
end
-
-
job = AnalyzeInstagramProfilePostJob.perform_later(
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
task_flags: {
-
generate_comments: false,
-
enforce_comment_evidence_policy: false,
-
retry_on_incomplete_profile: false
-
}
-
)
-
post.update!(
-
ai_status: "pending",
-
analyzed_at: nil,
-
metadata: metadata.merge(
-
"analysis_enqueued_at" => Time.current.utc.iso8601(3),
-
"analysis_enqueued_by" => self.class.name,
-
"analysis_enqueued_fingerprint" => fingerprint,
-
"analysis_job_id" => job.job_id
-
)
-
)
-
profile.record_event!(
-
kind: "profile_post_analysis_queued",
-
external_id: "profile_post_analysis_queued:#{post.id}:#{fingerprint}",
-
occurred_at: Time.current,
-
metadata: {
-
source: self.class.name,
-
instagram_profile_post_id: post.id,
-
shortcode: post.shortcode,
-
analysis_job_id: job.job_id
-
}
-
)
-
-
{ queued: true, reason: "queued", job_id: job.job_id }
-
rescue StandardError => e
-
Rails.logger.warn(
-
"[DownloadInstagramProfilePostMediaJob] analysis queue failed for post_id=#{post.id}: #{e.class}: #{e.message}"
-
)
-
{ queued: false, reason: "analysis_enqueue_failed", error_class: e.class.name, error_message: e.message.to_s }
-
end
-
-
def record_download_success!(profile:, post:, source:, media_url:)
-
now = Time.current
-
post.update!(
-
metadata: merged_metadata(post: post).merge(
-
"download_status" => "downloaded",
-
"download_source" => source.to_s,
-
"downloaded_at" => now.utc.iso8601(3),
-
"download_error" => nil
-
)
-
)
-
profile.record_event!(
-
kind: "profile_post_media_downloaded",
-
external_id: "profile_post_media_downloaded:#{post.id}:#{analysis_enqueue_fingerprint(post)}",
-
occurred_at: now,
-
metadata: {
-
source: self.class.name,
-
instagram_profile_post_id: post.id,
-
shortcode: post.shortcode,
-
media_url: media_url,
-
download_source: source.to_s,
-
media_attached: post.media.attached?
-
}
-
)
-
end
-
-
def mark_download_skipped!(profile:, post:, reason:)
-
post.update!(
-
metadata: merged_metadata(post: post).merge(
-
"download_status" => "skipped",
-
"download_skip_reason" => reason.to_s,
-
"download_error" => nil,
-
"downloaded_at" => nil
-
)
-
)
-
profile.record_event!(
-
kind: "profile_post_media_download_skipped",
-
external_id: "profile_post_media_download_skipped:#{post.id}:#{reason}",
-
occurred_at: Time.current,
-
metadata: {
-
source: self.class.name,
-
instagram_profile_post_id: post.id,
-
shortcode: post.shortcode,
-
reason: reason.to_s
-
}
-
)
-
{ status: "skipped", source: reason.to_s }
-
end
-
-
def mark_download_failed!(post:, error:)
-
post.update!(
-
metadata: merged_metadata(post: post).merge(
-
"download_status" => "failed",
-
"download_error" => "#{error.class}: #{error.message}",
-
"downloaded_at" => nil
-
)
-
)
-
rescue StandardError
-
nil
-
end
-
-
def mark_corrupt_media_detected!(post:, reason:)
-
post.update!(
-
metadata: merged_metadata(post: post).merge(
-
"download_status" => "corrupt_detected",
-
"download_error" => "integrity_check_failed: #{reason}",
-
"download_corrupt_detected_at" => Time.current.utc.iso8601(3)
-
)
-
)
-
rescue StandardError
-
nil
-
end
-
-
def post_deleted?(post)
-
ActiveModel::Type::Boolean.new.cast(merged_metadata(post: post)["deleted_from_source"])
-
end
-
-
def resolve_media_url(post)
-
primary = post.source_media_url.to_s.strip
-
return primary if primary.present?
-
-
metadata = merged_metadata(post: post)
-
metadata["media_url_video"].to_s.strip.presence || metadata["media_url_image"].to_s.strip.presence
-
end
-
-
def merged_metadata(post:)
-
post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
end
-
-
def analysis_enqueue_fingerprint(post)
-
return "blob:#{post.media.blob.checksum}" if post.media.attached? && post.media.blob&.checksum.to_s.present?
-
return "fp:#{post.media_url_fingerprint}" if post.media_url_fingerprint.to_s.present?
-
-
source = resolve_media_url(post)
-
return "url:#{Digest::SHA256.hexdigest(source)}" if source.present?
-
-
"post:#{post.id}"
-
end
-
-
def attach_media_from_local_cache!(post:)
-
blob = cached_media_blob_for(post: post)
-
return false unless blob
-
-
source_url = resolve_media_url(post)
-
fingerprint = source_url.present? ? Digest::SHA256.hexdigest(source_url) : post.media_url_fingerprint.to_s.presence
-
attach_blob_to_post!(post: post, blob: blob)
-
post.update!(
-
media_url_fingerprint: fingerprint
-
)
-
attach_preview_from_local_cache!(post: post)
-
true
-
rescue StandardError => e
-
Rails.logger.warn("[DownloadInstagramProfilePostMediaJob] local media cache attach failed post_id=#{post.id}: #{e.class}: #{e.message}")
-
false
-
end
-
-
def cached_media_blob_for(post:)
-
metadata = merged_metadata(post: post)
-
media_id = metadata["media_id"].to_s.strip
-
shortcode = post.shortcode.to_s.strip
-
-
if media_id.present?
-
by_media_id = InstagramProfilePost
-
.joins(:media_attachment)
-
.where.not(id: post.id)
-
.where("metadata ->> 'media_id' = ?", media_id)
-
.order(updated_at: :desc, id: :desc)
-
.first
-
if by_media_id&.media&.attached? && blob_integrity_for(by_media_id.media.blob)[:valid]
-
return by_media_id.media.blob
-
end
-
end
-
-
if shortcode.present?
-
by_shortcode_profile = InstagramProfilePost
-
.joins(:media_attachment)
-
.where.not(id: post.id)
-
.where(shortcode: shortcode)
-
.order(updated_at: :desc, id: :desc)
-
by_shortcode_profile.each do |candidate|
-
next unless candidate&.media&.attached?
-
-
blob = candidate.media.blob
-
return blob if blob_integrity_for(blob)[:valid]
-
end
-
-
by_shortcode_feed = InstagramPost
-
.joins(:media_attachment)
-
.where(shortcode: shortcode)
-
.order(media_downloaded_at: :desc, id: :desc)
-
by_shortcode_feed.each do |candidate|
-
next unless candidate&.media&.attached?
-
-
blob = candidate.media.blob
-
return blob if blob_integrity_for(blob)[:valid]
-
end
-
end
-
-
nil
-
end
-
-
def attach_preview_from_local_cache!(post:)
-
return false if post.preview_image.attached?
-
-
metadata = merged_metadata(post: post)
-
media_id = metadata["media_id"].to_s.strip
-
shortcode = post.shortcode.to_s.strip
-
-
if media_id.present?
-
by_media_id = InstagramProfilePost
-
.joins(:preview_image_attachment)
-
.where.not(id: post.id)
-
.where("metadata ->> 'media_id' = ?", media_id)
-
.order(updated_at: :desc, id: :desc)
-
.first
-
if by_media_id&.preview_image&.attached?
-
attach_preview_blob_to_post!(post: post, blob: by_media_id.preview_image.blob)
-
return true
-
end
-
end
-
-
return false if shortcode.blank?
-
-
by_shortcode = InstagramProfilePost
-
.joins(:preview_image_attachment)
-
.where.not(id: post.id)
-
.where(shortcode: shortcode)
-
.order(updated_at: :desc, id: :desc)
-
.first
-
if by_shortcode&.preview_image&.attached?
-
attach_preview_blob_to_post!(post: post, blob: by_shortcode.preview_image.blob)
-
return true
-
end
-
-
false
-
rescue StandardError
-
false
-
end
-
-
def ensure_preview_image_for_video!(post:, media_url:, video_bytes: nil, content_type: nil)
-
return false unless post.media.attached?
-
return false unless post.media.blob&.content_type.to_s.start_with?("video/")
-
return true if post.preview_image.attached?
-
-
metadata = merged_metadata(post: post)
-
if attach_preview_from_local_cache!(post: post)
-
stamp_preview_metadata!(post: post, source: "local_cache")
-
return true
-
end
-
-
poster_url = preferred_preview_image_url(post: post, media_url: media_url, metadata: metadata)
-
if poster_url.present?
-
downloaded = download_preview_image(poster_url)
-
if downloaded
-
attach_preview_image_bytes!(
-
post: post,
-
image_bytes: downloaded[:bytes],
-
content_type: downloaded[:content_type],
-
filename: downloaded[:filename]
-
)
-
stamp_preview_metadata!(post: post, source: "remote_image_url")
-
return true
-
end
-
end
-
-
bytes = video_bytes.to_s.b
-
if bytes.blank? && post.media.attached? && post.media.blob.byte_size.to_i <= MAX_VIDEO_BYTES
-
bytes = post.media.blob.download.to_s.b
-
end
-
if bytes.blank?
-
enqueue_background_preview_generation!(post: post, reason: "video_bytes_missing")
-
return false
-
end
-
-
extracted = VideoThumbnailService.new.extract_first_frame(
-
video_bytes: bytes,
-
reference_id: "profile_post_#{post.id}",
-
content_type: content_type || post.media.blob.content_type
-
)
-
unless extracted[:ok]
-
enqueue_background_preview_generation!(post: post, reason: extracted.dig(:metadata, :reason).to_s.presence || "ffmpeg_extract_failed")
-
return false
-
end
-
-
attach_preview_image_bytes!(
-
post: post,
-
image_bytes: extracted[:image_bytes],
-
content_type: extracted[:content_type],
-
filename: extracted[:filename]
-
)
-
stamp_preview_metadata!(post: post, source: "ffmpeg_first_frame")
-
true
-
rescue StandardError => e
-
Rails.logger.warn("[DownloadInstagramProfilePostMediaJob] preview attach failed post_id=#{post.id}: #{e.class}: #{e.message}")
-
enqueue_background_preview_generation!(post: post, reason: "#{e.class}: #{e.message}")
-
false
-
end
-
-
def preferred_preview_image_url(post:, media_url:, metadata:)
-
candidates = [
-
metadata["preview_image_url"],
-
metadata["poster_url"],
-
metadata["image_url"],
-
metadata["media_url_image"],
-
metadata["media_url"]
-
]
-
source_media = post.source_media_url.to_s.strip
-
source_looks_video = source_media.downcase.match?(/\.(mp4|mov|webm)(\?|$)/)
-
candidates << source_media if source_media.present? && !source_looks_video
-
candidates << media_url.to_s if media_url.to_s.present? && !source_looks_video
-
candidates.compact.map { |v| v.to_s.strip }.find(&:present?)
-
end
-
-
def download_preview_image(url, redirects_left: 3)
-
uri = URI.parse(url)
-
return nil unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
-
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.use_ssl = (uri.scheme == "https")
-
http.open_timeout = 8
-
http.read_timeout = 20
-
-
req = Net::HTTP::Get.new(uri.request_uri)
-
req["Accept"] = "image/*,*/*;q=0.8"
-
req["User-Agent"] = "Mozilla/5.0"
-
req["Referer"] = Instagram::Client::INSTAGRAM_BASE_URL
-
res = http.request(req)
-
-
if res.is_a?(Net::HTTPRedirection) && res["location"].present?
-
return nil if redirects_left.to_i <= 0
-
next_url = normalize_redirect_url(base_uri: uri, location: res["location"])
-
return nil if next_url.blank?
-
-
return download_preview_image(next_url, redirects_left: redirects_left.to_i - 1)
-
end
-
-
return nil unless res.is_a?(Net::HTTPSuccess)
-
-
body = res.body.to_s.b
-
return nil if body.bytesize <= 0 || body.bytesize > MAX_PREVIEW_IMAGE_BYTES
-
return nil if html_payload?(body)
-
-
content_type = res["content-type"].to_s.split(";").first.to_s
-
return nil unless content_type.start_with?("image/")
-
validate_known_signature!(body: body, content_type: content_type)
-
-
ext = extension_for_content_type(content_type)
-
{
-
bytes: body,
-
content_type: content_type,
-
filename: "profile_post_preview_#{Digest::SHA256.hexdigest(url)[0, 12]}.#{ext}"
-
}
-
rescue StandardError
-
nil
-
end
-
-
def attach_preview_image_bytes!(post:, image_bytes:, content_type:, filename:)
-
blob = ActiveStorage::Blob.create_and_upload!(
-
io: StringIO.new(image_bytes),
-
filename: filename,
-
content_type: content_type.to_s.presence || "image/jpeg",
-
identify: false
-
)
-
attach_preview_blob_to_post!(post: post, blob: blob)
-
end
-
-
def attach_preview_blob_to_post!(post:, blob:)
-
return unless blob
-
-
if post.preview_image.attached? && post.preview_image.attachment.present?
-
attachment = post.preview_image.attachment
-
attachment.update!(blob: blob) if attachment.blob_id != blob.id
-
return
-
end
-
-
post.preview_image.attach(blob)
-
end
-
-
def stamp_preview_metadata!(post:, source:)
-
post.update!(
-
metadata: merged_metadata(post: post).merge(
-
"preview_image_status" => "attached",
-
"preview_image_source" => source.to_s,
-
"preview_image_attached_at" => Time.current.utc.iso8601(3)
-
)
-
)
-
rescue StandardError
-
nil
-
end
-
-
def enqueue_background_preview_generation!(post:, reason:)
-
return if post.preview_image.attached?
-
return unless post.media.attached?
-
return unless post.media.blob&.content_type.to_s.start_with?("video/")
-
-
cache_key = "profile_post:preview_enqueue:#{post.id}"
-
Rails.cache.fetch(cache_key, expires_in: PROFILE_POST_PREVIEW_ENQUEUE_TTL_SECONDS) do
-
GenerateProfilePostPreviewImageJob.perform_later(instagram_profile_post_id: post.id)
-
true
-
end
-
rescue StandardError => e
-
Rails.logger.warn(
-
"[DownloadInstagramProfilePostMediaJob] preview enqueue failed post_id=#{post.id} " \
-
"reason=#{reason}: #{e.class}: #{e.message}"
-
)
-
nil
-
end
-
-
def download_media(url, redirects_left: 4)
-
uri = URI.parse(url)
-
raise "invalid media URL" unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
-
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.use_ssl = (uri.scheme == "https")
-
http.open_timeout = 10
-
http.read_timeout = 30
-
-
req = Net::HTTP::Get.new(uri.request_uri)
-
req["Accept"] = "*/*"
-
req["User-Agent"] = "Mozilla/5.0"
-
req["Referer"] = Instagram::Client::INSTAGRAM_BASE_URL
-
res = http.request(req)
-
-
if res.is_a?(Net::HTTPRedirection) && res["location"].present?
-
raise "too many redirects" if redirects_left.to_i <= 0
-
-
next_url = normalize_redirect_url(base_uri: uri, location: res["location"])
-
raise "invalid redirect URL" if next_url.blank?
-
-
return download_media(next_url, redirects_left: redirects_left.to_i - 1)
-
end
-
-
raise "media download failed: HTTP #{res.code}" unless res.is_a?(Net::HTTPSuccess)
-
-
body = res.body.to_s
-
content_type = res["content-type"].to_s.split(";").first.presence || "application/octet-stream"
-
limit = content_type.start_with?("video/") ? MAX_VIDEO_BYTES : MAX_IMAGE_BYTES
-
raise "empty media payload" if body.bytesize <= 0
-
raise "media too large" if body.bytesize > limit
-
raise "unexpected html payload" if html_payload?(body)
-
validate_known_signature!(body: body, content_type: content_type)
-
-
ext = extension_for_content_type(content_type)
-
io = StringIO.new(body)
-
io.set_encoding(Encoding::BINARY) if io.respond_to?(:set_encoding)
-
[io, content_type, "profile_post_#{Digest::SHA256.hexdigest(url)[0, 12]}.#{ext}"]
-
end
-
-
def normalize_redirect_url(base_uri:, location:)
-
target = URI.join(base_uri.to_s, location.to_s).to_s
-
parsed = URI.parse(target)
-
return nil unless parsed.is_a?(URI::HTTP) || parsed.is_a?(URI::HTTPS)
-
-
parsed.to_s
-
rescue URI::InvalidURIError, ArgumentError
-
nil
-
end
-
-
def extension_for_content_type(content_type)
-
return "jpg" if content_type.include?("jpeg")
-
return "png" if content_type.include?("png")
-
return "webp" if content_type.include?("webp")
-
return "gif" if content_type.include?("gif")
-
return "mp4" if content_type.include?("mp4")
-
return "mov" if content_type.include?("quicktime")
-
-
"bin"
-
end
-
-
def blob_integrity_for(blob)
-
return { valid: false, reason: "missing_blob" } unless blob
-
return { valid: false, reason: "non_positive_byte_size" } if blob.byte_size.to_i <= 0
-
-
service = blob.service
-
if service.respond_to?(:path_for, true)
-
path = service.send(:path_for, blob.key)
-
return { valid: false, reason: "missing_file_on_disk" } unless path && File.exist?(path)
-
-
file_size = File.size(path)
-
return { valid: false, reason: "zero_byte_file" } if file_size <= 0
-
return { valid: false, reason: "byte_size_mismatch" } if blob.byte_size.to_i.positive? && file_size != blob.byte_size.to_i
-
end
-
-
{ valid: true, reason: nil }
-
rescue StandardError => e
-
{ valid: false, reason: "integrity_check_error: #{e.class}" }
-
end
-
-
def attach_blob_to_post!(post:, blob:)
-
raise "missing blob for attach" unless blob
-
-
if post.media.attached? && post.media.attachment.present?
-
attachment = post.media.attachment
-
attachment.update!(blob: blob) if attachment.blob_id != blob.id
-
return
-
end
-
-
post.media.attach(blob)
-
end
-
-
def html_payload?(body)
-
sample = body.to_s.byteslice(0, 4096).to_s.downcase
-
sample.include?("<html") || sample.start_with?("<!doctype html")
-
end
-
-
def validate_known_signature!(body:, content_type:)
-
type = content_type.to_s.downcase
-
return if type.blank?
-
return if type.include?("octet-stream")
-
-
case
-
when type.include?("jpeg")
-
raise "invalid jpeg signature" unless body.start_with?("\xFF\xD8".b)
-
when type.include?("png")
-
raise "invalid png signature" unless body.start_with?("\x89PNG\r\n\x1A\n".b)
-
when type.include?("gif")
-
raise "invalid gif signature" unless body.start_with?("GIF87a".b) || body.start_with?("GIF89a".b)
-
when type.include?("webp")
-
raise "invalid webp signature" unless body.bytesize >= 12 && body.byteslice(0, 4) == "RIFF" && body.byteslice(8, 4) == "WEBP"
-
when type.start_with?("video/")
-
raise "invalid video signature" unless body.bytesize >= 12 && body.byteslice(4, 4) == "ftyp"
-
end
-
end
-
end
-
class DownloadMissingAvatarsJob < ApplicationJob
-
queue_as :avatars
-
-
def perform(instagram_account_id:, limit: 250)
-
account = InstagramAccount.find(instagram_account_id)
-
-
limit = limit.to_i.clamp(1, 2_000)
-
profiles = account.instagram_profiles
-
.where.not(profile_pic_url: [nil, ""])
-
.left_joins(:avatar_attachment)
-
.where(active_storage_attachments: { id: nil })
-
.limit(limit)
-
-
downloaded = 0
-
failed = 0
-
-
profiles.each do |profile|
-
begin
-
DownloadInstagramProfileAvatarJob.perform_now(instagram_account_id: account.id, instagram_profile_id: profile.id, broadcast: false)
-
downloaded += 1
-
rescue StandardError
-
failed += 1
-
end
-
end
-
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Avatar sync complete: downloaded #{downloaded}, failed #{failed}." }
-
)
-
rescue StandardError => e
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Avatar sync failed: #{e.message}" }
-
)
-
raise
-
end
-
end
-
class EnqueueAvatarSyncForAllAccountsJob < ApplicationJob
-
include ScheduledAccountBatching
-
-
queue_as :avatars
-
-
DEFAULT_ACCOUNT_BATCH_SIZE = ENV.fetch("AVATAR_SYNC_ACCOUNT_BATCH_SIZE", "30").to_i.clamp(5, 160)
-
CONTINUATION_WAIT_SECONDS = ENV.fetch("AVATAR_SYNC_CONTINUATION_WAIT_SECONDS", "2").to_i.clamp(1, 90)
-
-
def perform(opts = nil, **kwargs)
-
params = normalize_scheduler_params(
-
opts,
-
kwargs,
-
limit: 500,
-
batch_size: DEFAULT_ACCOUNT_BATCH_SIZE,
-
cursor_id: nil
-
)
-
limit = params[:limit].to_i.clamp(1, 2000)
-
batch = load_account_batch(
-
scope: InstagramAccount.all,
-
cursor_id: params[:cursor_id],
-
batch_size: params[:batch_size]
-
)
-
enqueued = 0
-
-
batch[:accounts].each do |account|
-
next if account.cookies.blank?
-
-
DownloadMissingAvatarsJob.perform_later(instagram_account_id: account.id, limit: limit)
-
enqueued += 1
-
rescue StandardError
-
next
-
end
-
-
continuation_job = nil
-
if batch[:has_more]
-
continuation_job = schedule_account_batch_continuation!(
-
wait_seconds: CONTINUATION_WAIT_SECONDS,
-
payload: {
-
limit: limit,
-
batch_size: batch[:batch_size],
-
cursor_id: batch[:next_cursor_id]
-
}
-
)
-
end
-
-
{
-
accounts_enqueued: enqueued,
-
scanned_accounts: batch[:accounts].length,
-
continuation_job_id: continuation_job&.job_id
-
}
-
end
-
end
-
class EnqueueContinuousAccountProcessingJob < ApplicationJob
-
include ScheduledAccountBatching
-
-
queue_as :sync
-
-
DEFAULT_ACCOUNT_BATCH_SIZE = ENV.fetch("CONTINUOUS_PROCESSING_ENQUEUE_BATCH_SIZE", "25").to_i.clamp(5, 120)
-
CONTINUATION_WAIT_SECONDS = ENV.fetch("CONTINUOUS_PROCESSING_ENQUEUE_CONTINUATION_WAIT_SECONDS", "2").to_i.clamp(1, 60)
-
-
def perform(opts = nil, **kwargs)
-
params = normalize_scheduler_params(
-
opts,
-
kwargs,
-
limit: 100,
-
batch_size: DEFAULT_ACCOUNT_BATCH_SIZE,
-
cursor_id: nil,
-
remaining: nil
-
)
-
cap = params[:limit].to_i.clamp(1, 500)
-
remaining = params[:remaining].present? ? params[:remaining].to_i : cap
-
remaining = remaining.clamp(0, cap)
-
return { enqueued: 0, limit: cap, remaining: 0 } if remaining <= 0
-
-
batch = load_account_batch(
-
scope: InstagramAccount.where(continuous_processing_enabled: true),
-
cursor_id: params[:cursor_id],
-
batch_size: [ params[:batch_size].to_i.clamp(1, 120), remaining ].min
-
)
-
-
enqueued = 0
-
now = Time.current
-
batch[:accounts].each do |account|
-
next if account.cookies.blank?
-
next if account.continuous_processing_retry_after_at.present? && account.continuous_processing_retry_after_at > now
-
-
ProcessInstagramAccountContinuouslyJob.perform_later(
-
instagram_account_id: account.id,
-
trigger_source: "scheduler"
-
)
-
enqueued += 1
-
rescue StandardError => e
-
Ops::StructuredLogger.warn(
-
event: "continuous_processing.enqueue_failed",
-
payload: {
-
account_id: account.id,
-
error_class: e.class.name,
-
error_message: e.message
-
}
-
)
-
end
-
-
scanned = batch[:accounts].length
-
remaining_after_batch = [ remaining - scanned, 0 ].max
-
continuation_job = nil
-
if batch[:has_more] && remaining_after_batch.positive?
-
continuation_job = schedule_account_batch_continuation!(
-
wait_seconds: CONTINUATION_WAIT_SECONDS,
-
payload: {
-
limit: cap,
-
batch_size: batch[:batch_size],
-
cursor_id: batch[:next_cursor_id],
-
remaining: remaining_after_batch
-
}
-
)
-
end
-
-
Ops::StructuredLogger.info(
-
event: "continuous_processing.batch_enqueued",
-
payload: {
-
limit: cap,
-
batch_size: batch[:batch_size],
-
scanned_accounts: scanned,
-
enqueued_count: enqueued,
-
remaining_after_batch: remaining_after_batch,
-
continuation_enqueued: continuation_job.present?,
-
continuation_job_id: continuation_job&.job_id
-
}
-
)
-
-
{
-
enqueued: enqueued,
-
limit: cap,
-
batch_size: batch[:batch_size],
-
scanned_accounts: scanned,
-
remaining_after_batch: remaining_after_batch,
-
continuation_job_id: continuation_job&.job_id
-
}
-
end
-
end
-
class EnqueueFeedAutoEngagementForAllAccountsJob < ApplicationJob
-
include ScheduledAccountBatching
-
-
queue_as :sync
-
-
DEFAULT_ACCOUNT_BATCH_SIZE = ENV.fetch("FEED_AUTO_ENGAGEMENT_ACCOUNT_BATCH_SIZE", "25").to_i.clamp(5, 120)
-
CONTINUATION_WAIT_SECONDS = ENV.fetch("FEED_AUTO_ENGAGEMENT_CONTINUATION_WAIT_SECONDS", "3").to_i.clamp(1, 90)
-
-
def perform(opts = nil, **kwargs)
-
params = normalize_scheduler_params(
-
opts,
-
kwargs,
-
max_posts: 3,
-
include_story: true,
-
story_hold_seconds: 18,
-
batch_size: DEFAULT_ACCOUNT_BATCH_SIZE,
-
cursor_id: nil
-
)
-
max_posts_i = params[:max_posts].to_i.clamp(1, 10)
-
include_story_bool = ActiveModel::Type::Boolean.new.cast(params[:include_story])
-
hold_seconds_i = params[:story_hold_seconds].to_i.clamp(8, 40)
-
batch = load_account_batch(
-
scope: InstagramAccount.all,
-
cursor_id: params[:cursor_id],
-
batch_size: params[:batch_size]
-
)
-
-
enqueued = 0
-
-
batch[:accounts].each do |account|
-
next if account.cookies.blank?
-
-
AutoEngageHomeFeedJob.perform_later(
-
instagram_account_id: account.id,
-
max_posts: max_posts_i,
-
include_story: include_story_bool,
-
story_hold_seconds: hold_seconds_i
-
)
-
enqueued += 1
-
rescue StandardError => e
-
Ops::StructuredLogger.warn(
-
event: "feed_auto_engagement.enqueue_failed",
-
payload: {
-
account_id: account.id,
-
error_class: e.class.name,
-
error_message: e.message
-
}
-
)
-
next
-
end
-
-
continuation_job = nil
-
if batch[:has_more]
-
continuation_job = schedule_account_batch_continuation!(
-
wait_seconds: CONTINUATION_WAIT_SECONDS,
-
payload: {
-
max_posts: max_posts_i,
-
include_story: include_story_bool,
-
story_hold_seconds: hold_seconds_i,
-
batch_size: batch[:batch_size],
-
cursor_id: batch[:next_cursor_id]
-
}
-
)
-
end
-
-
Ops::StructuredLogger.info(
-
event: "feed_auto_engagement.batch_enqueued",
-
payload: {
-
enqueued_accounts: enqueued,
-
max_posts: max_posts_i,
-
include_story: include_story_bool,
-
story_hold_seconds: hold_seconds_i,
-
batch_size: batch[:batch_size],
-
scanned_accounts: batch[:accounts].length,
-
continuation_enqueued: continuation_job.present?,
-
continuation_job_id: continuation_job&.job_id
-
}
-
)
-
-
{
-
enqueued_accounts: enqueued,
-
scanned_accounts: batch[:accounts].length,
-
continuation_job_id: continuation_job&.job_id
-
}
-
end
-
end
-
class EnqueueFollowGraphSyncForAllAccountsJob < ApplicationJob
-
include ScheduledAccountBatching
-
-
queue_as :sync
-
-
DEFAULT_ACCOUNT_BATCH_SIZE = ENV.fetch("FOLLOW_GRAPH_SYNC_ACCOUNT_BATCH_SIZE", "20").to_i.clamp(5, 120)
-
CONTINUATION_WAIT_SECONDS = ENV.fetch("FOLLOW_GRAPH_SYNC_CONTINUATION_WAIT_SECONDS", "3").to_i.clamp(1, 90)
-
-
def perform(opts = nil, **kwargs)
-
params = normalize_scheduler_params(opts, kwargs, batch_size: DEFAULT_ACCOUNT_BATCH_SIZE, cursor_id: nil)
-
batch = load_account_batch(
-
scope: InstagramAccount.where.not(username: [ nil, "" ]),
-
cursor_id: params[:cursor_id],
-
batch_size: params[:batch_size]
-
)
-
-
enqueued = 0
-
batch[:accounts].each do |account|
-
next if account.cookies.blank?
-
-
run = account.sync_runs.create!(kind: "follow_graph", status: "queued")
-
SyncFollowGraphJob.perform_later(instagram_account_id: account.id, sync_run_id: run.id)
-
enqueued += 1
-
rescue StandardError
-
# best-effort; errors will be recorded by ApplicationJob failure logging
-
next
-
end
-
-
continuation_job = nil
-
if batch[:has_more]
-
continuation_job = schedule_account_batch_continuation!(
-
wait_seconds: CONTINUATION_WAIT_SECONDS,
-
payload: {
-
batch_size: batch[:batch_size],
-
cursor_id: batch[:next_cursor_id]
-
}
-
)
-
end
-
-
{
-
accounts_enqueued: enqueued,
-
scanned_accounts: batch[:accounts].length,
-
continuation_job_id: continuation_job&.job_id
-
}
-
end
-
end
-
class EnqueueProfileRefreshForAllAccountsJob < ApplicationJob
-
include ScheduledAccountBatching
-
-
queue_as :profiles
-
-
DEFAULT_ACCOUNT_BATCH_SIZE = ENV.fetch("PROFILE_REFRESH_ACCOUNT_BATCH_SIZE", "20").to_i.clamp(5, 120)
-
CONTINUATION_WAIT_SECONDS = ENV.fetch("PROFILE_REFRESH_CONTINUATION_WAIT_SECONDS", "3").to_i.clamp(1, 90)
-
-
def perform(opts = nil, **kwargs)
-
params = normalize_scheduler_params(
-
opts,
-
kwargs,
-
limit_per_account: 30,
-
batch_size: DEFAULT_ACCOUNT_BATCH_SIZE,
-
cursor_id: nil
-
)
-
limit = params[:limit_per_account].to_i.clamp(1, 500)
-
batch = load_account_batch(
-
scope: InstagramAccount.all,
-
cursor_id: params[:cursor_id],
-
batch_size: params[:batch_size]
-
)
-
enqueued = 0
-
-
batch[:accounts].each do |account|
-
next if account.cookies.blank?
-
-
SyncNextProfilesForAccountJob.perform_later(instagram_account_id: account.id, limit: limit)
-
enqueued += 1
-
rescue StandardError
-
next
-
end
-
-
continuation_job = nil
-
if batch[:has_more]
-
continuation_job = schedule_account_batch_continuation!(
-
wait_seconds: CONTINUATION_WAIT_SECONDS,
-
payload: {
-
limit_per_account: limit,
-
batch_size: batch[:batch_size],
-
cursor_id: batch[:next_cursor_id]
-
}
-
)
-
end
-
-
{
-
accounts_enqueued: enqueued,
-
scanned_accounts: batch[:accounts].length,
-
continuation_job_id: continuation_job&.job_id
-
}
-
end
-
end
-
require "set"
-
-
class EnqueueRecentProfilePostScansForAccountJob < ApplicationJob
-
queue_as :post_downloads
-
-
VISITED_TAG = SyncRecentProfilePostsForProfileJob::VISITED_TAG
-
ANALYZED_TAG = SyncRecentProfilePostsForProfileJob::ANALYZED_TAG
-
PRIORITY_LEVELS = %i[high medium low].freeze
-
PROFILE_SCAN_COOLDOWN_SECONDS = ENV.fetch("PROFILE_SCAN_COOLDOWN_SECONDS", "1800").to_i.clamp(60, 12.hours.to_i)
-
PROFILE_SCAN_REFRESH_INTERVAL_SECONDS = ENV.fetch("PROFILE_SCAN_REFRESH_INTERVAL_SECONDS", "4500").to_i.clamp(300, 12.hours.to_i)
-
PROFILE_SCAN_ACTIVE_LOOKBACK_SECONDS = ENV.fetch("PROFILE_SCAN_ACTIVE_LOOKBACK_SECONDS", "7200").to_i.clamp(300, 24.hours.to_i)
-
PROFILE_SCAN_INSPECTION_MULTIPLIER = ENV.fetch("PROFILE_SCAN_INSPECTION_MULTIPLIER", "8").to_i.clamp(2, 20)
-
PROFILE_SCAN_MAX_INSPECTION = ENV.fetch("PROFILE_SCAN_MAX_INSPECTION", "320").to_i.clamp(30, 2000)
-
-
def perform(instagram_account_id:, limit_per_account: 8, posts_limit: 3, comments_limit: 8)
-
account = InstagramAccount.find(instagram_account_id)
-
return if account.cookies.blank?
-
now = Time.current
-
-
cap = limit_per_account.to_i.clamp(1, 30)
-
posts_limit_i = posts_limit.to_i.clamp(1, 3)
-
comments_limit_i = comments_limit.to_i.clamp(1, 20)
-
-
selection = pick_profiles_for_scan(account: account, limit: cap, now: now)
-
active_scans = active_profile_scan_profile_ids(
-
account: account,
-
profile_ids: selection[:candidate_profile_ids],
-
now: now
-
)
-
enqueued = 0
-
considered_profile_id = nil
-
skipped = []
-
-
selection[:ordered_candidates].each do |candidate|
-
break if enqueued >= cap
-
-
profile = candidate[:profile]
-
priority = candidate[:priority].to_s
-
considered_profile_id = profile.id
-
skip_reason = skip_reason_for_profile_scan(profile: profile, active_scans: active_scans, now: now)
-
if skip_reason.present?
-
skipped << { profile_id: profile.id, priority: priority, reason: skip_reason }
-
next
-
end
-
-
SyncRecentProfilePostsForProfileJob.perform_later(
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
posts_limit: posts_limit_i,
-
comments_limit: comments_limit_i
-
)
-
enqueued += 1
-
rescue StandardError => e
-
skipped << { profile_id: profile&.id, priority: priority, reason: "enqueue_failed", error_class: e.class.name }
-
Ops::StructuredLogger.warn(
-
event: "profile_scan.enqueue_failed",
-
payload: {
-
account_id: account.id,
-
profile_id: profile.id,
-
error_class: e.class.name,
-
error_message: e.message
-
}
-
)
-
end
-
-
persist_scheduler_cursor!(
-
account: account,
-
cursor_id: considered_profile_id || selection[:cursor_end_id],
-
now: now
-
)
-
-
Ops::StructuredLogger.info(
-
event: "profile_scan.account_batch_enqueued",
-
payload: {
-
account_id: account.id,
-
candidate_profiles: selection[:candidate_profile_ids].length,
-
selected_profiles: selection[:ordered_candidates].length,
-
enqueued_jobs: enqueued,
-
skipped_profiles: skipped.length,
-
skipped_reasons: skipped.group_by { |row| row[:reason] }.transform_values(&:length),
-
cursor_start_id: selection[:cursor_start_id],
-
cursor_end_id: considered_profile_id || selection[:cursor_end_id],
-
priority_counts: selection[:priority_counts],
-
limit_per_account: cap,
-
posts_limit: posts_limit_i,
-
comments_limit: comments_limit_i,
-
profile_scan_cooldown_seconds: PROFILE_SCAN_COOLDOWN_SECONDS,
-
scan_refresh_interval_seconds: PROFILE_SCAN_REFRESH_INTERVAL_SECONDS
-
}
-
)
-
rescue StandardError => e
-
Ops::StructuredLogger.error(
-
event: "profile_scan.account_batch_failed",
-
payload: {
-
account_id: instagram_account_id,
-
error_class: e.class.name,
-
error_message: e.message
-
}
-
)
-
raise
-
end
-
-
private
-
-
def pick_profiles_for_scan(account:, limit:, now:)
-
candidate_ids = account.instagram_profiles
-
.where("following = ? OR follows_you = ?", true, true)
-
.order(:id)
-
.pluck(:id)
-
-
if candidate_ids.empty?
-
return {
-
candidate_profile_ids: [],
-
ordered_candidates: [],
-
cursor_start_id: account.continuous_processing_profile_scan_cursor_id,
-
cursor_end_id: account.continuous_processing_profile_scan_cursor_id,
-
priority_counts: {}
-
}
-
end
-
-
cursor_start_id = account.continuous_processing_profile_scan_cursor_id
-
rotated_ids = rotate_ids(ids: candidate_ids, cursor_id: cursor_start_id)
-
inspection_count = [ [ limit * PROFILE_SCAN_INSPECTION_MULTIPLIER, limit ].max, PROFILE_SCAN_MAX_INSPECTION, rotated_ids.length ].min
-
inspection_ids = rotated_ids.first(inspection_count)
-
profile_by_id = account.instagram_profiles
-
.where(id: inspection_ids)
-
.includes(:profile_tags)
-
.to_a
-
.index_by(&:id)
-
inspected_profiles = inspection_ids.filter_map { |id| profile_by_id[id] }
-
eligible_profiles = inspected_profiles.reject { |profile| Instagram::ProfileScanPolicy.skip_from_cached_profile?(profile: profile) }
-
weighted = eligible_profiles.map { |profile| { profile: profile, priority: scan_priority_for(profile: profile, now: now) } }
-
ordered_candidates = PRIORITY_LEVELS.flat_map do |priority|
-
weighted.select { |row| row[:priority] == priority }
-
end
-
-
{
-
candidate_profile_ids: eligible_profiles.map(&:id),
-
ordered_candidates: ordered_candidates,
-
cursor_start_id: cursor_start_id,
-
cursor_end_id: inspection_ids.last,
-
priority_counts: weighted.group_by { |row| row[:priority] }.transform_values(&:size)
-
}
-
end
-
-
def skip_reason_for_profile_scan(profile:, active_scans:, now:)
-
return "already_queued_or_running" if active_scans.include?(profile.id)
-
-
last_scan_at = profile.ai_last_analyzed_at || profile.last_synced_at
-
return nil if last_scan_at.blank?
-
return nil if last_scan_at <= (now - PROFILE_SCAN_COOLDOWN_SECONDS.seconds)
-
-
last_activity_at = [ profile.last_post_at, profile.last_story_seen_at ].compact.max
-
return nil if last_activity_at.present? && last_activity_at > last_scan_at
-
-
"cooldown_active"
-
end
-
-
def scan_priority_for(profile:, now:)
-
last_scan_at = profile.ai_last_analyzed_at || profile.last_synced_at
-
last_activity_at = [ profile.last_post_at, profile.last_story_seen_at ].compact.max
-
tag_names = profile.profile_tags.map { |tag| tag.name.to_s }
-
unseen = !tag_names.include?(VISITED_TAG) || !tag_names.include?(ANALYZED_TAG)
-
-
return :high if last_scan_at.blank?
-
return :high if unseen
-
return :high if last_activity_at.present? && last_activity_at > last_scan_at
-
return :medium if last_scan_at <= (now - PROFILE_SCAN_REFRESH_INTERVAL_SECONDS.seconds)
-
-
:low
-
end
-
-
def active_profile_scan_profile_ids(account:, profile_ids:, now:)
-
return Set.new if profile_ids.empty?
-
-
lookback = now - PROFILE_SCAN_ACTIVE_LOOKBACK_SECONDS.seconds
-
ids = account.instagram_profile_action_logs
-
.where(action: "analyze_profile", status: %w[queued running], instagram_profile_id: profile_ids)
-
.where("occurred_at >= ?", lookback)
-
.distinct
-
.pluck(:instagram_profile_id)
-
-
ids.to_set
-
end
-
-
def rotate_ids(ids:, cursor_id:)
-
return ids if ids.empty? || cursor_id.blank?
-
-
index = ids.index(cursor_id.to_i)
-
return ids unless index
-
-
ids.drop(index + 1) + ids.take(index + 1)
-
end
-
-
def persist_scheduler_cursor!(account:, cursor_id:, now:)
-
updates = {
-
continuous_processing_last_profile_scan_enqueued_at: now,
-
updated_at: Time.current
-
}
-
updates[:continuous_processing_profile_scan_cursor_id] = cursor_id.to_i if cursor_id.present?
-
account.update_columns(updates)
-
rescue StandardError
-
nil
-
end
-
end
-
class EnqueueRecentProfilePostScansForAllAccountsJob < ApplicationJob
-
include ScheduledAccountBatching
-
-
queue_as :post_downloads
-
-
DEFAULT_ACCOUNT_BATCH_SIZE = ENV.fetch("PROFILE_SCAN_ACCOUNT_BATCH_SIZE", "25").to_i.clamp(5, 120)
-
CONTINUATION_WAIT_SECONDS = ENV.fetch("PROFILE_SCAN_CONTINUATION_WAIT_SECONDS", "3").to_i.clamp(1, 90)
-
-
# Accept a single hash (e.g. from Sidekiq cron/schedule) or keyword args from perform_later(...)
-
def perform(opts = nil, **kwargs)
-
params = normalize_scheduler_params(
-
opts,
-
kwargs,
-
limit_per_account: 8,
-
posts_limit: 3,
-
comments_limit: 8,
-
batch_size: DEFAULT_ACCOUNT_BATCH_SIZE,
-
cursor_id: nil
-
)
-
limit_per_account = params[:limit_per_account].to_i.clamp(1, 30)
-
posts_limit_i = params[:posts_limit].to_i.clamp(1, 3)
-
comments_limit_i = params[:comments_limit].to_i.clamp(1, 20)
-
batch = load_account_batch(
-
scope: InstagramAccount.all,
-
cursor_id: params[:cursor_id],
-
batch_size: params[:batch_size]
-
)
-
-
enqueued_accounts = 0
-
-
batch[:accounts].each do |account|
-
next if account.cookies.blank?
-
-
EnqueueRecentProfilePostScansForAccountJob.perform_later(
-
instagram_account_id: account.id,
-
limit_per_account: limit_per_account,
-
posts_limit: posts_limit_i,
-
comments_limit: comments_limit_i
-
)
-
enqueued_accounts += 1
-
rescue StandardError => e
-
Ops::StructuredLogger.warn(
-
event: "profile_scan.all_accounts_enqueue_failed",
-
payload: {
-
account_id: account.id,
-
error_class: e.class.name,
-
error_message: e.message
-
}
-
)
-
next
-
end
-
-
continuation_job = nil
-
if batch[:has_more]
-
continuation_job = schedule_account_batch_continuation!(
-
wait_seconds: CONTINUATION_WAIT_SECONDS,
-
payload: {
-
limit_per_account: limit_per_account,
-
posts_limit: posts_limit_i,
-
comments_limit: comments_limit_i,
-
batch_size: batch[:batch_size],
-
cursor_id: batch[:next_cursor_id]
-
}
-
)
-
end
-
-
Ops::StructuredLogger.info(
-
event: "profile_scan.all_accounts_batch_enqueued",
-
payload: {
-
accounts_enqueued: enqueued_accounts,
-
scanned_accounts: batch[:accounts].length,
-
limit_per_account: limit_per_account,
-
posts_limit: posts_limit_i,
-
comments_limit: comments_limit_i,
-
batch_size: batch[:batch_size],
-
continuation_enqueued: continuation_job.present?,
-
continuation_job_id: continuation_job&.job_id
-
}
-
)
-
-
{
-
accounts_enqueued: enqueued_accounts,
-
scanned_accounts: batch[:accounts].length,
-
continuation_job_id: continuation_job&.job_id
-
}
-
end
-
end
-
class EnqueueStoryAutoRepliesForAllAccountsJob < ApplicationJob
-
include ScheduledAccountBatching
-
-
queue_as :story_downloads
-
-
DEFAULT_ACCOUNT_BATCH_SIZE = ENV.fetch("STORY_AUTO_REPLY_ACCOUNT_BATCH_SIZE", "20").to_i.clamp(5, 120)
-
CONTINUATION_WAIT_SECONDS = ENV.fetch("STORY_AUTO_REPLY_CONTINUATION_WAIT_SECONDS", "3").to_i.clamp(1, 90)
-
-
def perform(opts = nil, **kwargs)
-
params = normalize_scheduler_params(
-
opts,
-
kwargs,
-
max_stories: 10,
-
force_analyze_all: false,
-
profile_limit: SyncProfileStoriesForAccountJob::STORY_BATCH_LIMIT,
-
batch_size: DEFAULT_ACCOUNT_BATCH_SIZE,
-
cursor_id: nil
-
)
-
max_stories_i = params[:max_stories].to_i.clamp(1, 10)
-
force = ActiveModel::Type::Boolean.new.cast(params[:force_analyze_all])
-
profile_limit = params[:profile_limit].to_i.clamp(1, SyncProfileStoriesForAccountJob::STORY_BATCH_LIMIT)
-
batch = load_account_batch(
-
scope: InstagramAccount.all,
-
cursor_id: params[:cursor_id],
-
batch_size: params[:batch_size]
-
)
-
-
enqueued = 0
-
-
batch[:accounts].each do |account|
-
next if account.cookies.blank?
-
-
SyncProfileStoriesForAccountJob.perform_later(
-
instagram_account_id: account.id,
-
story_limit: profile_limit,
-
stories_per_profile: max_stories_i,
-
with_comments: true,
-
require_auto_reply_tag: true,
-
force_analyze_all: force
-
)
-
enqueued += 1
-
rescue StandardError => e
-
Ops::StructuredLogger.warn(
-
event: "story_auto_reply.enqueue_failed",
-
payload: {
-
account_id: account.id,
-
error_class: e.class.name,
-
error_message: e.message
-
}
-
)
-
next
-
end
-
-
continuation_job = nil
-
if batch[:has_more]
-
continuation_job = schedule_account_batch_continuation!(
-
wait_seconds: CONTINUATION_WAIT_SECONDS,
-
payload: {
-
max_stories: max_stories_i,
-
force_analyze_all: force,
-
profile_limit: profile_limit,
-
batch_size: batch[:batch_size],
-
cursor_id: batch[:next_cursor_id]
-
}
-
)
-
end
-
-
Ops::StructuredLogger.info(
-
event: "story_auto_reply.batch_enqueued",
-
payload: {
-
enqueued_accounts: enqueued,
-
scanned_accounts: batch[:accounts].length,
-
max_stories: max_stories_i,
-
force_analyze_all: force,
-
profile_limit: profile_limit,
-
batch_size: batch[:batch_size],
-
continuation_enqueued: continuation_job.present?,
-
continuation_job_id: continuation_job&.job_id
-
}
-
)
-
-
{
-
enqueued_accounts: enqueued,
-
scanned_accounts: batch[:accounts].length,
-
continuation_job_id: continuation_job&.job_id
-
}
-
end
-
end
-
require "digest"
-
require "uri"
-
require "cgi"
-
-
class FetchInstagramProfileDetailsJob < ApplicationJob
-
queue_as :profiles
-
-
def perform(instagram_account_id:, instagram_profile_id:, profile_action_log_id: nil)
-
account = InstagramAccount.find_by(id: instagram_account_id)
-
unless account
-
Ops::StructuredLogger.info(
-
event: "profile_fetch_details.skipped_missing_account",
-
payload: {
-
instagram_account_id: instagram_account_id,
-
instagram_profile_id: instagram_profile_id
-
}
-
)
-
return
-
end
-
-
profile = account.instagram_profiles.find_by(id: instagram_profile_id)
-
unless profile
-
Ops::StructuredLogger.info(
-
event: "profile_fetch_details.skipped_missing_profile",
-
payload: {
-
instagram_account_id: account.id,
-
instagram_profile_id: instagram_profile_id
-
}
-
)
-
return
-
end
-
-
action_log = find_or_create_action_log(
-
account: account,
-
profile: profile,
-
action: "fetch_profile_details",
-
profile_action_log_id: profile_action_log_id
-
)
-
action_log.mark_running!(extra_metadata: { queue_name: queue_name, active_job_id: job_id })
-
-
client = Instagram::Client.new(account: account)
-
details = fetch_profile_details_with_messageability(client: client, username: profile.username)
-
normalized_pic_url = Instagram::AvatarUrlNormalizer.normalize(details[:profile_pic_url])
-
followers_count = normalize_count(details[:followers_count])
-
-
prev_last_post_at = profile.last_post_at
-
profile.update!(
-
display_name: details[:display_name].presence || profile.display_name,
-
profile_pic_url: normalized_pic_url.presence || profile.profile_pic_url,
-
ig_user_id: details[:ig_user_id].presence || profile.ig_user_id,
-
bio: details[:bio].presence || profile.bio,
-
followers_count: followers_count || profile.followers_count,
-
can_message: details[:can_message],
-
restriction_reason: details[:restriction_reason],
-
dm_interaction_state: details[:dm_state].to_s.presence || (details[:can_message] ? "messageable" : "unavailable"),
-
dm_interaction_reason: details[:dm_reason].to_s.presence || details[:restriction_reason].to_s,
-
dm_interaction_checked_at: Time.current,
-
dm_interaction_retry_after_at: details[:dm_retry_after_at],
-
last_post_at: details[:last_post_at].presence || profile.last_post_at
-
)
-
-
profile.recompute_last_active!
-
profile.save!
-
-
apply_scan_exclusion_tag!(profile: profile, details: details)
-
-
# Record post activity (best-effort from API profile payload).
-
if profile.last_post_at.present? && (prev_last_post_at.nil? || profile.last_post_at > prev_last_post_at)
-
eid =
-
details[:latest_post_shortcode].presence ||
-
"post:#{profile.last_post_at.to_i}"
-
profile.record_event!(
-
kind: "post_detected",
-
external_id: eid,
-
occurred_at: profile.last_post_at,
-
metadata: { source: "profile_page" }
-
)
-
end
-
-
# If avatar URL changed (or we never downloaded an attachment), refresh in the background.
-
new_url = Instagram::AvatarUrlNormalizer.normalize(profile.profile_pic_url)
-
if new_url.present? && (profile.avatar.blank? || avatar_fp(new_url) != profile.avatar_url_fingerprint.to_s)
-
avatar_log = profile.instagram_profile_action_logs.create!(
-
instagram_account: account,
-
action: "sync_avatar",
-
status: "queued",
-
trigger_source: "job",
-
occurred_at: Time.current,
-
metadata: { triggered_by: self.class.name, reason: "profile_pic_changed" }
-
)
-
avatar_job = DownloadInstagramProfileAvatarJob.perform_later(
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
force: false,
-
profile_action_log_id: avatar_log.id
-
)
-
avatar_log.update!(active_job_id: avatar_job.job_id, queue_name: avatar_job.queue_name)
-
elsif new_url.blank? && profile.profile_pic_url.present?
-
profile.update!(profile_pic_url: nil, avatar_url_fingerprint: nil, avatar_synced_at: Time.current)
-
profile.record_event!(kind: "avatar_missing", external_id: "avatar_missing:#{Time.current.utc.to_date.iso8601}", metadata: { source: "profile_page" })
-
end
-
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Fetched profile details for #{profile.username}." }
-
)
-
action_log.mark_succeeded!(
-
extra_metadata: { can_message: profile.can_message, last_post_at: profile.last_post_at&.iso8601 },
-
log_text: "Fetched profile details and updated profile attributes"
-
)
-
rescue StandardError => e
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Profile fetch failed: #{e.message}" }
-
)
-
action_log&.mark_failed!(error_message: e.message, extra_metadata: { active_job_id: job_id })
-
raise
-
end
-
-
private
-
-
def find_or_create_action_log(account:, profile:, action:, profile_action_log_id:)
-
log = profile.instagram_profile_action_logs.find_by(id: profile_action_log_id) if profile_action_log_id.present?
-
return log if log
-
-
profile.instagram_profile_action_logs.create!(
-
instagram_account: account,
-
action: action,
-
status: "queued",
-
trigger_source: "job",
-
occurred_at: Time.current,
-
active_job_id: job_id,
-
queue_name: queue_name,
-
metadata: { created_by: self.class.name }
-
)
-
end
-
-
def avatar_fp(url)
-
url = CGI.unescapeHTML(url.to_s)
-
uri = URI.parse(url)
-
base = "#{uri.host}#{uri.path}"
-
Digest::SHA256.hexdigest(base)
-
rescue StandardError
-
Digest::SHA256.hexdigest(url.to_s)
-
end
-
-
def normalize_count(value)
-
text = value.to_s.strip
-
return nil unless text.match?(/\A\d+\z/)
-
-
text.to_i
-
rescue StandardError
-
nil
-
end
-
-
def fetch_profile_details_with_messageability(client:, username:)
-
if client.respond_to?(:fetch_profile_details_and_verify_messageability!)
-
result = client.fetch_profile_details_and_verify_messageability!(username: username)
-
return result.is_a?(Hash) ? result.symbolize_keys : {}
-
end
-
-
Ops::StructuredLogger.warn(
-
event: "profile_fetch_details.fallback_missing_combined_method",
-
payload: { username: username.to_s }
-
)
-
details = client.fetch_profile_details!(username: username)
-
details_hash = details.is_a?(Hash) ? details : {}
-
eligibility =
-
if client.respond_to?(:verify_messageability!)
-
value = client.verify_messageability!(username: username)
-
value.is_a?(Hash) ? value : {}
-
else
-
{}
-
end
-
-
details_hash.symbolize_keys.merge(eligibility.symbolize_keys)
-
end
-
-
def apply_scan_exclusion_tag!(profile:, details:)
-
decision = Instagram::ProfileScanPolicy.new(profile: profile, profile_details: details).decision
-
if decision[:reason_code].to_s == "non_personal_profile_page" || decision[:reason_code].to_s == "scan_excluded_tag"
-
Instagram::ProfileScanPolicy.mark_scan_excluded!(profile: profile)
-
return
-
end
-
-
Instagram::ProfileScanPolicy.clear_scan_excluded!(profile: profile) unless decision[:skip_scan]
-
rescue StandardError
-
nil
-
end
-
end
-
class FinalizePostAnalysisPipelineJob < PostAnalysisPipelineJob
-
queue_as :ai_visual_queue
-
-
MAX_FINALIZE_ATTEMPTS = ENV.fetch("AI_PIPELINE_FINALIZE_ATTEMPTS", 30).to_i.clamp(5, 120)
-
FINALIZER_LOCK_SECONDS = ENV.fetch("AI_PIPELINE_FINALIZER_LOCK_SECONDS", 4).to_i.clamp(2, 30)
-
STEP_STALL_TIMEOUT_SECONDS = ENV.fetch("AI_PIPELINE_STEP_STALL_TIMEOUT_SECONDS", 180).to_i.clamp(45, 1800)
-
-
def perform(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, pipeline_run_id:, attempts: 0)
-
context = load_pipeline_context!(
-
instagram_account_id: instagram_account_id,
-
instagram_profile_id: instagram_profile_id,
-
instagram_profile_post_id: instagram_profile_post_id,
-
pipeline_run_id: pipeline_run_id
-
)
-
return unless context
-
-
account = context[:account]
-
profile = context[:profile]
-
post = context[:post]
-
pipeline_state = context[:pipeline_state]
-
if pipeline_state.pipeline_terminal?(run_id: pipeline_run_id)
-
Ops::StructuredLogger.info(
-
event: "ai.pipeline.finalizer.skipped_terminal",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
pipeline_run_id: pipeline_run_id
-
}
-
)
-
return
-
end
-
-
return unless acquire_finalizer_slot?(post: post, pipeline_run_id: pipeline_run_id, attempts: attempts)
-
-
maybe_enqueue_metadata_step!(context: context, pipeline_run_id: pipeline_run_id)
-
mark_stalled_steps_failed!(context: context, pipeline_run_id: pipeline_run_id)
-
-
unless pipeline_state.all_required_steps_terminal?(run_id: pipeline_run_id)
-
if attempts.to_i >= MAX_FINALIZE_ATTEMPTS
-
finalize_as_failed!(
-
post: post,
-
pipeline_state: pipeline_state,
-
pipeline_run_id: pipeline_run_id,
-
reason: "pipeline_timeout"
-
)
-
return
-
end
-
-
wait_seconds = finalize_poll_delay_seconds(attempts: attempts)
-
self.class.set(wait: wait_seconds.seconds).perform_later(
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
pipeline_run_id: pipeline_run_id,
-
attempts: attempts.to_i + 1
-
)
-
return
-
end
-
-
pipeline = pipeline_state.pipeline_for(run_id: pipeline_run_id)
-
required_steps = Array(pipeline["required_steps"]).map(&:to_s)
-
visual_status = pipeline.dig("steps", "visual", "status").to_s
-
succeeded_steps = required_steps.select do |step|
-
pipeline.dig("steps", step, "status").to_s == "succeeded"
-
end
-
overall_status =
-
if required_steps.include?("visual")
-
visual_status == "succeeded" ? "completed" : "failed"
-
else
-
succeeded_steps.any? ? "completed" : "failed"
-
end
-
-
finalize_post_record!(post: post, pipeline: pipeline, overall_status: overall_status)
-
-
pipeline_state.mark_pipeline_finished!(
-
run_id: pipeline_run_id,
-
status: overall_status,
-
details: {
-
finalized_by: self.class.name,
-
finalized_at: Time.current.iso8601(3),
-
attempts: attempts.to_i,
-
visual_status: visual_status
-
}
-
)
-
-
notification_kind = overall_status == "completed" ? "notice" : "alert"
-
notification_message =
-
if overall_status == "completed"
-
"Profile post analyzed: #{post.shortcode}."
-
else
-
"Profile post analysis degraded/failed for #{post.shortcode}."
-
end
-
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: notification_kind, message: notification_message }
-
)
-
rescue StandardError => e
-
finalize_as_failed!(
-
post: context&.dig(:post),
-
pipeline_state: context&.dig(:pipeline_state),
-
pipeline_run_id: pipeline_run_id,
-
reason: format_error(e)
-
)
-
raise
-
end
-
-
private
-
-
def acquire_finalizer_slot?(post:, pipeline_run_id:, attempts:)
-
now = Time.current
-
acquired = false
-
-
# Every pipeline step enqueues a finalizer; this short lock serializes metadata writes.
-
post.with_lock do
-
metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
pipeline = metadata["ai_pipeline"]
-
unless pipeline.is_a?(Hash) && pipeline["run_id"].to_s == pipeline_run_id.to_s
-
acquired = false
-
next
-
end
-
-
finalizer = pipeline["finalizer"].is_a?(Hash) ? pipeline["finalizer"] : {}
-
lock_until = parse_time(finalizer["lock_until"])
-
if lock_until.present? && lock_until > now
-
acquired = false
-
next
-
end
-
-
finalizer["lock_until"] = (now + FINALIZER_LOCK_SECONDS.seconds).iso8601(3)
-
finalizer["last_started_at"] = now.iso8601(3)
-
finalizer["last_job_id"] = job_id
-
finalizer["last_attempt"] = attempts.to_i
-
pipeline["finalizer"] = finalizer
-
metadata["ai_pipeline"] = pipeline
-
post.update!(metadata: metadata)
-
acquired = true
-
end
-
-
acquired
-
rescue StandardError
-
true
-
end
-
-
def finalize_poll_delay_seconds(attempts:)
-
value = attempts.to_i
-
return 5 if value < 3
-
return 10 if value < 8
-
return 15 if value < 14
-
return 20 if value < 20
-
-
30
-
end
-
-
def parse_time(value)
-
return nil if value.to_s.blank?
-
-
Time.zone.parse(value.to_s)
-
rescue StandardError
-
nil
-
end
-
-
def maybe_enqueue_metadata_step!(context:, pipeline_run_id:)
-
pipeline_state = context[:pipeline_state]
-
return unless pipeline_state.required_step_pending?(run_id: pipeline_run_id, step: "metadata")
-
# Metadata tagging depends on outputs from core extraction steps.
-
return unless pipeline_state.core_steps_terminal?(run_id: pipeline_run_id)
-
-
job = ProcessPostMetadataTaggingJob.perform_later(
-
instagram_account_id: context[:account].id,
-
instagram_profile_id: context[:profile].id,
-
instagram_profile_post_id: context[:post].id,
-
pipeline_run_id: pipeline_run_id
-
)
-
-
pipeline_state.mark_step_queued!(
-
run_id: pipeline_run_id,
-
step: "metadata",
-
queue_name: job.queue_name,
-
active_job_id: job.job_id,
-
result: {
-
enqueued_by: self.class.name,
-
enqueued_at: Time.current.iso8601(3)
-
}
-
)
-
rescue StandardError => e
-
pipeline_state.mark_step_completed!(
-
run_id: pipeline_run_id,
-
step: "metadata",
-
status: "failed",
-
error: format_error(e),
-
result: {
-
reason: "metadata_enqueue_failed"
-
}
-
)
-
end
-
-
def finalize_post_record!(post:, pipeline:, overall_status:)
-
analysis = post.analysis.is_a?(Hash) ? post.analysis.deep_dup : {}
-
metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
-
ocr_meta = metadata["ocr_analysis"].is_a?(Hash) ? metadata["ocr_analysis"] : {}
-
if ocr_meta["ocr_text"].to_s.present?
-
analysis["ocr_text"] = ocr_meta["ocr_text"]
-
analysis["ocr_blocks"] = Array(ocr_meta["ocr_blocks"]).first(40)
-
end
-
-
video_meta = metadata["video_processing"].is_a?(Hash) ? metadata["video_processing"] : {}
-
if video_meta.present?
-
analysis["video_processing_mode"] = video_meta["processing_mode"].to_s if video_meta["processing_mode"].to_s.present?
-
analysis["video_static_detected"] = ActiveModel::Type::Boolean.new.cast(video_meta["static"]) if video_meta.key?("static")
-
analysis["video_semantic_route"] = video_meta["semantic_route"].to_s if video_meta["semantic_route"].to_s.present?
-
analysis["video_duration_seconds"] = video_meta["duration_seconds"] if video_meta.key?("duration_seconds")
-
analysis["video_context_summary"] = video_meta["context_summary"].to_s if video_meta["context_summary"].to_s.present?
-
analysis["transcript"] = video_meta["transcript"].to_s if video_meta["transcript"].to_s.present?
-
analysis["video_topics"] = normalize_string_array(video_meta["topics"], limit: 40)
-
analysis["video_objects"] = normalize_string_array(video_meta["objects"], limit: 50)
-
analysis["video_scenes"] = Array(video_meta["scenes"]).select { |row| row.is_a?(Hash) }.first(50)
-
analysis["video_hashtags"] = normalize_string_array(video_meta["hashtags"], limit: 50)
-
analysis["video_mentions"] = normalize_string_array(video_meta["mentions"], limit: 50)
-
analysis["video_profile_handles"] = normalize_string_array(video_meta["profile_handles"], limit: 50)
-
-
analysis["topics"] = merge_string_array(analysis["topics"], video_meta["topics"], limit: 40)
-
analysis["objects"] = merge_string_array(analysis["objects"], video_meta["objects"], limit: 50)
-
analysis["hashtags"] = merge_string_array(analysis["hashtags"], video_meta["hashtags"], limit: 50)
-
analysis["mentions"] = merge_string_array(analysis["mentions"], video_meta["mentions"], limit: 50)
-
-
if analysis["ocr_text"].to_s.blank? && video_meta["ocr_text"].to_s.present?
-
analysis["ocr_text"] = video_meta["ocr_text"].to_s
-
end
-
if Array(analysis["ocr_blocks"]).empty?
-
analysis["ocr_blocks"] = Array(video_meta["ocr_blocks"]).select { |row| row.is_a?(Hash) }.first(40)
-
end
-
end
-
-
metadata["ai_pipeline"] = pipeline
-
-
if overall_status == "completed"
-
metadata.delete("ai_pipeline_failure")
-
post.update!(
-
analysis: analysis,
-
metadata: metadata,
-
ai_status: "analyzed",
-
analyzed_at: Time.current
-
)
-
else
-
post.update!(
-
analysis: analysis,
-
metadata: metadata,
-
ai_status: "failed",
-
analyzed_at: nil
-
)
-
end
-
end
-
-
def mark_stalled_steps_failed!(context:, pipeline_run_id:)
-
pipeline_state = context[:pipeline_state]
-
pipeline = pipeline_state.pipeline_for(run_id: pipeline_run_id)
-
return unless pipeline.is_a?(Hash)
-
-
required_steps = Array(pipeline["required_steps"]).map(&:to_s)
-
return if required_steps.empty?
-
-
now = Time.current
-
required_steps.each do |step|
-
row = pipeline.dig("steps", step)
-
next unless row.is_a?(Hash)
-
-
status = row["status"].to_s
-
next unless status.in?(%w[queued running])
-
-
age_seconds = step_age_seconds(step_row: row, pipeline: pipeline, now: now)
-
next unless age_seconds
-
next if age_seconds < STEP_STALL_TIMEOUT_SECONDS
-
-
pipeline_state.mark_step_completed!(
-
run_id: pipeline_run_id,
-
step: step,
-
status: "failed",
-
error: "step_stalled_timeout: status=#{status} age_seconds=#{age_seconds.to_i}",
-
result: {
-
reason: "step_stalled_timeout",
-
previous_status: status,
-
age_seconds: age_seconds.to_i,
-
timeout_seconds: STEP_STALL_TIMEOUT_SECONDS
-
}
-
)
-
-
Ops::StructuredLogger.warn(
-
event: "ai.pipeline.step_stalled",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: context[:account].id,
-
instagram_profile_id: context[:profile].id,
-
instagram_profile_post_id: context[:post].id,
-
pipeline_run_id: pipeline_run_id,
-
step: step,
-
previous_status: status,
-
age_seconds: age_seconds.to_i,
-
timeout_seconds: STEP_STALL_TIMEOUT_SECONDS
-
}
-
)
-
end
-
rescue StandardError
-
nil
-
end
-
-
def step_age_seconds(step_row:, pipeline:, now:)
-
reference =
-
parse_time(step_row["started_at"]) ||
-
parse_time(step_row.dig("result", "enqueued_at")) ||
-
parse_time(step_row["created_at"]) ||
-
parse_time(pipeline["updated_at"]) ||
-
parse_time(pipeline["created_at"])
-
return nil unless reference
-
-
(now - reference).to_f
-
rescue StandardError
-
nil
-
end
-
-
def finalize_as_failed!(post:, pipeline_state:, pipeline_run_id:, reason:)
-
return unless post
-
-
metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
metadata["ai_pipeline_failure"] = {
-
reason: reason.to_s,
-
failed_at: Time.current.iso8601(3),
-
source: self.class.name
-
}
-
-
post.update!(metadata: metadata, ai_status: "failed", analyzed_at: nil)
-
-
pipeline_state&.mark_pipeline_finished!(
-
run_id: pipeline_run_id,
-
status: "failed",
-
details: {
-
reason: reason.to_s,
-
finalized_at: Time.current.iso8601(3)
-
}
-
)
-
rescue StandardError
-
nil
-
end
-
-
def normalize_string_array(values, limit:)
-
Array(values).map(&:to_s).map(&:strip).reject(&:blank?).uniq.first(limit)
-
end
-
-
def merge_string_array(existing, incoming, limit:)
-
normalize_string_array(Array(existing) + Array(incoming), limit: limit)
-
end
-
end
-
class GenerateLlmCommentJob < ApplicationJob
-
queue_as :ai
-
-
PROFILE_PREPARATION_RETRY_REASON_CODES = %w[
-
latest_posts_not_analyzed
-
insufficient_analyzed_posts
-
no_recent_posts_available
-
missing_structured_post_signals
-
profile_preparation_failed
-
profile_preparation_error
-
].freeze
-
PROFILE_PREPARATION_RETRY_MAX_ATTEMPTS = ENV.fetch("STORY_COMMENT_PROFILE_PREPARATION_RETRY_MAX_ATTEMPTS", 3).to_i.clamp(1, 10)
-
-
retry_on Net::OpenTimeout, Net::ReadTimeout, wait: :polynomially_longer, attempts: 3
-
retry_on Errno::ECONNREFUSED, Errno::ECONNRESET, wait: :polynomially_longer, attempts: 3
-
-
def perform(instagram_profile_event_id:, provider: "local", model: nil, requested_by: "system")
-
requested_provider = provider.to_s
-
provider = "local"
-
event = InstagramProfileEvent.find(instagram_profile_event_id)
-
return unless event.story_archive_item?
-
account = event.instagram_profile&.instagram_account
-
profile = event.instagram_profile
-
-
if event.has_llm_generated_comment?
-
event.update_columns(
-
llm_comment_status: "completed",
-
llm_comment_last_error: nil,
-
updated_at: Time.current
-
)
-
-
Ops::StructuredLogger.info(
-
event: "llm_comment.already_completed",
-
payload: {
-
event_id: event.id,
-
instagram_profile_id: event.instagram_profile_id,
-
requested_provider: requested_provider,
-
requested_by: requested_by
-
}
-
)
-
return
-
end
-
-
preparation = prepare_profile_context(profile: profile, account: account)
-
persist_profile_preparation_snapshot(event: event, preparation: preparation)
-
unless ActiveModel::Type::Boolean.new.cast(preparation[:ready_for_comment_generation] || preparation["ready_for_comment_generation"])
-
reason_code = preparation[:reason_code].to_s.presence || preparation["reason_code"].to_s.presence || "profile_comment_preparation_not_ready"
-
reason_text = preparation[:reason].to_s.presence || preparation["reason"].to_s.presence || "Profile context is not ready for grounded comment generation."
-
raise InstagramProfileEvent::LocalStoryIntelligenceUnavailableError.new(
-
reason_text,
-
reason: reason_code,
-
source: "profile_comment_preparation"
-
)
-
end
-
-
event.mark_llm_comment_running!(job_id: job_id)
-
result = event.generate_llm_comment!(provider: provider, model: model)
-
-
Ops::StructuredLogger.info(
-
event: "llm_comment.completed",
-
payload: {
-
event_id: event.id,
-
instagram_profile_id: event.instagram_profile_id,
-
provider: event.llm_comment_provider,
-
requested_provider: requested_provider,
-
model: event.llm_comment_model,
-
relevance_score: event.llm_comment_relevance_score,
-
requested_by: requested_by,
-
source: result[:source]
-
}
-
)
-
rescue InstagramProfileEvent::LocalStoryIntelligenceUnavailableError => e
-
event&.mark_llm_comment_skipped!(message: e.message, reason: e.reason, source: e.source)
-
retry_result = schedule_build_history_retry_if_needed(
-
event: event,
-
reason_code: e.reason,
-
requested_provider: requested_provider,
-
model: model,
-
requested_by: requested_by
-
)
-
event&.queue_llm_comment_generation!(job_id: retry_result[:job_id]) if retry_result[:queued]
-
-
Ops::StructuredLogger.warn(
-
event: "llm_comment.skipped_no_context",
-
payload: {
-
event_id: event&.id,
-
instagram_profile_id: event&.instagram_profile_id,
-
provider: provider,
-
requested_provider: requested_provider,
-
model: model,
-
requested_by: requested_by,
-
reason: e.reason,
-
source: e.source,
-
error_message: e.message,
-
retry_queued: ActiveModel::Type::Boolean.new.cast(retry_result[:queued]),
-
retry_reason: retry_result[:reason].to_s.presence,
-
retry_job_id: retry_result[:job_id].to_s.presence,
-
retry_next_run_at: retry_result[:next_run_at].to_s.presence
-
}
-
)
-
rescue StandardError => e
-
event&.mark_llm_comment_failed!(error: e)
-
-
Ops::StructuredLogger.error(
-
event: "llm_comment.failed",
-
payload: {
-
event_id: event&.id,
-
instagram_profile_id: event&.instagram_profile_id,
-
provider: provider,
-
requested_provider: requested_provider,
-
model: model,
-
requested_by: requested_by,
-
error_class: e.class.name,
-
error_message: e.message
-
}
-
)
-
-
raise
-
end
-
-
private
-
-
def prepare_profile_context(profile:, account:)
-
return { ready_for_comment_generation: false, reason_code: "profile_missing", reason: "Profile missing for event." } unless profile && account
-
-
Ai::ProfileCommentPreparationService.new(account: account, profile: profile).prepare!
-
rescue StandardError => e
-
{
-
ready_for_comment_generation: false,
-
reason_code: "profile_preparation_error",
-
reason: e.message.to_s,
-
error_class: e.class.name
-
}
-
end
-
-
def persist_profile_preparation_snapshot(event:, preparation:)
-
return unless event
-
return unless preparation.is_a?(Hash)
-
-
existing = event.llm_comment_metadata.is_a?(Hash) ? event.llm_comment_metadata.deep_dup : {}
-
existing["profile_comment_preparation"] = preparation
-
event.update_columns(llm_comment_metadata: existing, updated_at: Time.current)
-
rescue StandardError
-
nil
-
end
-
-
def schedule_build_history_retry_if_needed(event:, reason_code:, requested_provider:, model:, requested_by:)
-
return { queued: false, reason: "event_missing" } unless event
-
return { queued: false, reason: "reason_not_retryable" } unless PROFILE_PREPARATION_RETRY_REASON_CODES.include?(reason_code.to_s)
-
-
metadata = event.llm_comment_metadata.is_a?(Hash) ? event.llm_comment_metadata.deep_dup : {}
-
retry_state = metadata["profile_preparation_retry"].is_a?(Hash) ? metadata["profile_preparation_retry"].deep_dup : {}
-
attempts = retry_state["attempts"].to_i
-
return { queued: false, reason: "retry_attempts_exhausted" } if attempts >= PROFILE_PREPARATION_RETRY_MAX_ATTEMPTS
-
-
profile = event.instagram_profile
-
account = profile&.instagram_account
-
return { queued: false, reason: "profile_missing" } unless profile && account
-
-
history_result = BuildInstagramProfileHistoryJob.enqueue_with_resume_if_needed!(
-
account: account,
-
profile: profile,
-
trigger_source: "story_comment_preparation_fallback",
-
requested_by: self.class.name,
-
resume_job: {
-
job_class: self.class,
-
job_kwargs: {
-
instagram_profile_event_id: event.id,
-
provider: requested_provider,
-
model: model,
-
requested_by: "profile_preparation_retry:#{requested_by}"
-
}
-
}
-
)
-
return { queued: false, reason: history_result[:reason] } unless ActiveModel::Type::Boolean.new.cast(history_result[:accepted])
-
-
retry_state["attempts"] = attempts + 1
-
retry_state["last_reason_code"] = reason_code.to_s
-
retry_state["last_skipped_at"] = Time.current.iso8601(3)
-
retry_state["last_enqueued_at"] = Time.current.iso8601(3)
-
retry_state["next_run_at"] = history_result[:next_run_at].to_s.presence
-
retry_state["job_id"] = history_result[:job_id].to_s.presence
-
retry_state["build_history_action_log_id"] = history_result[:action_log_id].to_i if history_result[:action_log_id].present?
-
retry_state["source"] = self.class.name
-
retry_state["mode"] = "build_history_fallback"
-
metadata["profile_preparation_retry"] = retry_state
-
event.update_columns(llm_comment_metadata: metadata, updated_at: Time.current)
-
-
{
-
queued: true,
-
reason: "build_history_fallback_registered",
-
job_id: history_result[:job_id].to_s,
-
action_log_id: history_result[:action_log_id],
-
next_run_at: history_result[:next_run_at].to_s
-
}
-
rescue StandardError => e
-
{
-
queued: false,
-
reason: "retry_enqueue_failed",
-
error_class: e.class.name,
-
error_message: e.message.to_s
-
}
-
end
-
end
-
class GenerateProfilePostPreviewImageJob < ApplicationJob
-
queue_as :frame_generation
-
-
retry_on ActiveStorage::PreviewError, wait: :polynomially_longer, attempts: 3
-
retry_on StandardError, wait: 10.seconds, attempts: 2
-
-
def perform(instagram_profile_post_id:)
-
post = InstagramProfilePost.find_by(id: instagram_profile_post_id)
-
return unless post&.media&.attached?
-
return if post.preview_image.attached?
-
return unless post.media.blob&.content_type.to_s.start_with?("video/")
-
-
preview = post.media.preview(resize_to_limit: [ 640, 640 ]).processed
-
preview_image = preview.image
-
return unless preview_image&.attached?
-
-
post.with_lock do
-
return if post.preview_image.attached?
-
-
post.preview_image.attach(preview_image.blob)
-
metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
post.update!(
-
metadata: metadata.merge(
-
"preview_image_status" => "attached",
-
"preview_image_source" => "active_storage_preview_job",
-
"preview_image_attached_at" => Time.current.utc.iso8601(3)
-
)
-
)
-
end
-
-
Rails.logger.info("[GenerateProfilePostPreviewImageJob] attached preview_image post_id=#{post.id} blob_id=#{preview_image.blob.id}")
-
rescue StandardError => e
-
Rails.logger.warn("[GenerateProfilePostPreviewImageJob] failed post_id=#{instagram_profile_post_id}: #{e.class}: #{e.message}")
-
raise
-
end
-
end
-
class GenerateStoryPreviewImageJob < ApplicationJob
-
queue_as :frame_generation
-
-
retry_on ActiveStorage::PreviewError, wait: :polynomially_longer, attempts: 3
-
retry_on StandardError, wait: 10.seconds, attempts: 2
-
-
def perform(instagram_profile_event_id:)
-
event = InstagramProfileEvent.find_by(id: instagram_profile_event_id)
-
return unless event&.media&.attached?
-
return if event.preview_image.attached?
-
return unless event.media.blob&.content_type.to_s.start_with?("video/")
-
-
preview = event.media.preview(resize_to_limit: [640, 640]).processed
-
preview_image = preview.image
-
return unless preview_image&.attached?
-
-
event.with_lock do
-
return if event.preview_image.attached?
-
-
event.preview_image.attach(preview_image.blob)
-
end
-
-
Rails.logger.info("[GenerateStoryPreviewImageJob] attached preview_image event_id=#{event.id} blob_id=#{preview_image.blob.id}")
-
rescue StandardError => e
-
Rails.logger.warn("[GenerateStoryPreviewImageJob] failed event_id=#{instagram_profile_event_id}: #{e.class}: #{e.message}")
-
raise
-
end
-
end
-
1
class PostAnalysisPipelineJob < ApplicationJob
-
1
private
-
-
1
def load_pipeline_context!(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, pipeline_run_id:)
-
account = InstagramAccount.find(instagram_account_id)
-
profile = account.instagram_profiles.find(instagram_profile_id)
-
post = profile.instagram_profile_posts.find(instagram_profile_post_id)
-
pipeline_state = Ai::PostAnalysisPipelineState.new(post: post)
-
pipeline = pipeline_state.pipeline_for(run_id: pipeline_run_id)
-
else: 0
then: 0
return nil unless pipeline
-
-
{
-
account: account,
-
profile: profile,
-
post: post,
-
pipeline_state: pipeline_state,
-
pipeline: pipeline
-
}
-
rescue ActiveRecord::RecordNotFound
-
nil
-
end
-
-
1
def enqueue_pipeline_finalizer(account:, profile:, post:, pipeline_run_id:, attempts: 0)
-
FinalizePostAnalysisPipelineJob.perform_later(
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
pipeline_run_id: pipeline_run_id,
-
attempts: attempts
-
)
-
rescue StandardError
-
nil
-
end
-
-
1
def format_error(error)
-
"#{error.class}: #{error.message}".byteslice(0, 320)
-
end
-
end
-
class PostInstagramProfileCommentJob < ApplicationJob
-
queue_as :messages
-
-
def perform(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, comment_text:, media_id:, profile_action_log_id: nil)
-
account = InstagramAccount.find(instagram_account_id)
-
profile = account.instagram_profiles.find(instagram_profile_id)
-
post = profile.instagram_profile_posts.find(instagram_profile_post_id)
-
action_log = profile.instagram_profile_action_logs.find_by(id: profile_action_log_id)
-
action_log&.mark_running!(extra_metadata: { queue_name: queue_name, active_job_id: job_id })
-
-
result = Instagram::Client.new(account: account).post_comment_to_media!(
-
media_id: media_id.to_s,
-
shortcode: post.shortcode.to_s,
-
comment_text: comment_text.to_s
-
)
-
-
profile.record_event!(
-
kind: "post_comment_sent",
-
external_id: "post_comment_sent:#{media_id}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "profile_post_suggestion_modal",
-
post_shortcode: post.shortcode,
-
media_id: media_id.to_s,
-
comment_text: comment_text.to_s,
-
api_result: result
-
}
-
)
-
-
action_log&.mark_succeeded!(
-
extra_metadata: { post_shortcode: post.shortcode, media_id: media_id.to_s },
-
log_text: "Comment posted on #{post.shortcode}"
-
)
-
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Comment posted on #{post.shortcode}." }
-
)
-
rescue StandardError => e
-
action_log&.mark_failed!(error_message: e.message, extra_metadata: { active_job_id: job_id, media_id: media_id.to_s })
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Comment post failed: #{e.message}" }
-
) if defined?(account) && account
-
raise
-
end
-
end
-
class ProcessInstagramAccountContinuouslyJob < ApplicationJob
-
queue_as :sync
-
-
RUNNING_STALE_AFTER = 15.minutes
-
-
retry_on Net::OpenTimeout, Net::ReadTimeout, wait: :polynomially_longer, attempts: 4
-
retry_on Errno::ECONNREFUSED, Errno::ECONNRESET, wait: :polynomially_longer, attempts: 4
-
-
def perform(instagram_account_id:, trigger_source: "scheduler")
-
account = InstagramAccount.find_by(id: instagram_account_id)
-
unless account
-
Ops::StructuredLogger.info(
-
event: "continuous_processing.skipped_missing_account",
-
payload: {
-
instagram_account_id: instagram_account_id,
-
trigger_source: trigger_source
-
}
-
)
-
return
-
end
-
-
return unless account.continuous_processing_enabled?
-
-
if retry_backoff_active?(account)
-
Ops::StructuredLogger.info(
-
event: "continuous_processing.skipped_retry_backoff",
-
payload: {
-
account_id: account.id,
-
retry_after_at: account.continuous_processing_retry_after_at&.iso8601,
-
trigger_source: trigger_source
-
}
-
)
-
return
-
end
-
-
acquired = claim_processing_lock!(account: account, trigger_source: trigger_source)
-
return unless acquired
-
-
run = account.sync_runs.create!(
-
kind: "continuous_processing",
-
status: "running",
-
started_at: Time.current,
-
stats: {
-
trigger_source: trigger_source,
-
pipeline_version: "continuous_processing_v1"
-
}
-
)
-
-
stats = Pipeline::AccountProcessingCoordinator.new(
-
account: account,
-
trigger_source: trigger_source
-
).run!
-
-
run.update!(
-
status: "succeeded",
-
finished_at: Time.current,
-
stats: (run.stats || {}).merge(stats).merge(status: "succeeded")
-
)
-
-
account.update!(
-
continuous_processing_state: "idle",
-
continuous_processing_last_finished_at: Time.current,
-
continuous_processing_last_heartbeat_at: Time.current,
-
continuous_processing_last_error: nil,
-
continuous_processing_failure_count: 0,
-
continuous_processing_retry_after_at: nil
-
)
-
-
Ops::StructuredLogger.info(
-
event: "continuous_processing.completed",
-
payload: {
-
account_id: account.id,
-
sync_run_id: run.id,
-
trigger_source: trigger_source,
-
enqueued_jobs: Array(stats[:enqueued_jobs]).size,
-
skipped_jobs: Array(stats[:skipped_jobs]).size
-
}
-
)
-
rescue StandardError => e
-
handle_failure!(
-
account: account,
-
run: run,
-
error: e,
-
trigger_source: trigger_source,
-
instagram_account_id: instagram_account_id
-
)
-
raise
-
end
-
-
private
-
-
def retry_backoff_active?(account)
-
account.continuous_processing_retry_after_at.present? && account.continuous_processing_retry_after_at > Time.current
-
end
-
-
def claim_processing_lock!(account:, trigger_source:)
-
claimed = false
-
-
account.with_lock do
-
stale = account.continuous_processing_last_heartbeat_at.blank? || account.continuous_processing_last_heartbeat_at < RUNNING_STALE_AFTER.ago
-
-
if account.continuous_processing_state == "running" && !stale
-
Ops::StructuredLogger.info(
-
event: "continuous_processing.skipped_already_running",
-
payload: {
-
account_id: account.id,
-
trigger_source: trigger_source,
-
last_heartbeat_at: account.continuous_processing_last_heartbeat_at&.iso8601
-
}
-
)
-
next
-
end
-
-
account.update!(
-
continuous_processing_state: "running",
-
continuous_processing_last_started_at: Time.current,
-
continuous_processing_last_heartbeat_at: Time.current,
-
continuous_processing_last_error: nil
-
)
-
-
claimed = true
-
end
-
-
claimed
-
end
-
-
def handle_failure!(account:, run:, error:, trigger_source:, instagram_account_id:)
-
account ||= InstagramAccount.where(id: instagram_account_id).first
-
-
return unless account
-
-
account.with_lock do
-
failures = account.continuous_processing_failure_count.to_i + 1
-
retry_after = Time.current + failure_backoff_for(failures)
-
-
account.update!(
-
continuous_processing_state: "idle",
-
continuous_processing_last_finished_at: Time.current,
-
continuous_processing_last_heartbeat_at: Time.current,
-
continuous_processing_last_error: "#{error.class}: #{error.message}",
-
continuous_processing_failure_count: failures,
-
continuous_processing_retry_after_at: retry_after
-
)
-
end
-
-
run&.update!(
-
status: "failed",
-
finished_at: Time.current,
-
error_message: error.message,
-
stats: (run.stats || {}).merge(
-
status: "failed",
-
error_class: error.class.name,
-
error_message: error.message
-
)
-
)
-
-
Ops::StructuredLogger.error(
-
event: "continuous_processing.failed",
-
payload: {
-
account_id: account.id,
-
sync_run_id: run&.id,
-
trigger_source: trigger_source,
-
error_class: error.class.name,
-
error_message: error.message,
-
retry_after_at: account.continuous_processing_retry_after_at&.iso8601,
-
failure_count: account.continuous_processing_failure_count
-
}
-
)
-
end
-
-
def failure_backoff_for(failure_count)
-
base =
-
case failure_count
-
when 1 then 5.minutes
-
when 2 then 15.minutes
-
when 3 then 30.minutes
-
when 4 then 1.hour
-
else 3.hours
-
end
-
-
base + rand(0..90).seconds
-
end
-
end
-
require "timeout"
-
-
class ProcessPostFaceAnalysisJob < PostAnalysisPipelineJob
-
queue_as :ai_face_queue
-
-
retry_on Net::OpenTimeout, Net::ReadTimeout, wait: :polynomially_longer, attempts: 3
-
retry_on Errno::ECONNRESET, Errno::ECONNREFUSED, wait: :polynomially_longer, attempts: 3
-
retry_on Timeout::Error, wait: :polynomially_longer, attempts: 2
-
-
def perform(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, pipeline_run_id:)
-
enqueue_finalizer = true
-
context = load_pipeline_context!(
-
instagram_account_id: instagram_account_id,
-
instagram_profile_id: instagram_profile_id,
-
instagram_profile_post_id: instagram_profile_post_id,
-
pipeline_run_id: pipeline_run_id
-
)
-
return unless context
-
-
pipeline_state = context[:pipeline_state]
-
post = context[:post]
-
if pipeline_state.pipeline_terminal?(run_id: pipeline_run_id) || pipeline_state.step_terminal?(run_id: pipeline_run_id, step: "face")
-
enqueue_finalizer = false
-
Ops::StructuredLogger.info(
-
event: "ai.face_analysis.skipped_terminal",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: context[:account].id,
-
instagram_profile_id: context[:profile].id,
-
instagram_profile_post_id: post.id,
-
pipeline_run_id: pipeline_run_id
-
}
-
)
-
return
-
end
-
-
pipeline_state.mark_step_running!(
-
run_id: pipeline_run_id,
-
step: "face",
-
queue_name: queue_name,
-
active_job_id: job_id
-
)
-
-
result = Timeout.timeout(face_timeout_seconds) do
-
PostFaceRecognitionService.new.process!(post: post)
-
end
-
-
pipeline_state.mark_step_completed!(
-
run_id: pipeline_run_id,
-
step: "face",
-
status: "succeeded",
-
result: {
-
skipped: ActiveModel::Type::Boolean.new.cast(result[:skipped]),
-
face_count: result[:face_count].to_i,
-
reason: result[:reason].to_s,
-
matched_people_count: Array(result[:matched_people]).length
-
}
-
)
-
rescue StandardError => e
-
context&.dig(:pipeline_state)&.mark_step_completed!(
-
run_id: pipeline_run_id,
-
step: "face",
-
status: "failed",
-
error: format_error(e),
-
result: {
-
reason: "face_analysis_failed"
-
}
-
)
-
raise
-
ensure
-
if context && enqueue_finalizer
-
enqueue_pipeline_finalizer(
-
account: context[:account],
-
profile: context[:profile],
-
post: context[:post],
-
pipeline_run_id: pipeline_run_id
-
)
-
end
-
end
-
-
private
-
-
def face_timeout_seconds
-
ENV.fetch("AI_FACE_TIMEOUT_SECONDS", 180).to_i.clamp(20, 420)
-
end
-
end
-
1
class ProcessPostMetadataTaggingJob < PostAnalysisPipelineJob
-
1
queue_as :ai_metadata_queue
-
-
PROFILE_INCOMPLETE_REASON_CODES = %w[
-
1
latest_posts_not_analyzed
-
insufficient_analyzed_posts
-
no_recent_posts_available
-
missing_structured_post_signals
-
profile_preparation_failed
-
profile_preparation_error
-
].freeze
-
1
COMMENT_RETRY_MAX_ATTEMPTS = ENV.fetch("POST_COMMENT_RETRY_MAX_ATTEMPTS", 3).to_i.clamp(1, 10)
-
-
1
def perform(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, pipeline_run_id:)
-
enqueue_finalizer = true
-
context = load_pipeline_context!(
-
instagram_account_id: instagram_account_id,
-
instagram_profile_id: instagram_profile_id,
-
instagram_profile_post_id: instagram_profile_post_id,
-
pipeline_run_id: pipeline_run_id
-
)
-
else: 0
then: 0
return unless context
-
-
account = context[:account]
-
post = context[:post]
-
profile = context[:profile]
-
pipeline_state = context[:pipeline_state]
-
then: 0
else: 0
if pipeline_state.pipeline_terminal?(run_id: pipeline_run_id) || pipeline_state.step_terminal?(run_id: pipeline_run_id, step: "metadata")
-
enqueue_finalizer = false
-
Ops::StructuredLogger.info(
-
event: "ai.metadata_tagging.skipped_terminal",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
pipeline_run_id: pipeline_run_id
-
}
-
)
-
return
-
end
-
-
pipeline_state.mark_step_running!(
-
run_id: pipeline_run_id,
-
step: "metadata",
-
queue_name: queue_name,
-
active_job_id: job_id
-
)
-
-
then: 0
else: 0
analysis = post.analysis.is_a?(Hash) ? post.analysis.deep_dup : {}
-
then: 0
else: 0
face_meta = post.metadata.is_a?(Hash) ? post.metadata.dig("face_recognition") : nil
-
else: 0
then: 0
face_meta = {} unless face_meta.is_a?(Hash)
-
matched_people = Array(face_meta["matched_people"])
-
-
analysis["face_summary"] = {
-
"face_count" => face_meta["face_count"].to_i,
-
"owner_faces_count" => matched_people.count { |row| ActiveModel::Type::Boolean.new.cast(row["owner_match"] || row[:owner_match]) },
-
"recurring_faces_count" => matched_people.count { |row| ActiveModel::Type::Boolean.new.cast(row["recurring_face"] || row[:recurring_face]) },
-
"detection_source" => face_meta["detection_source"].to_s.presence,
-
"participant_summary" => face_meta["participant_summary"].to_s.presence,
-
"detection_reason" => face_meta["detection_reason"].to_s.presence,
-
"detection_error" => face_meta["detection_error"].to_s.presence
-
}.compact
-
-
post.update!(analysis: analysis)
-
-
Ai::ProfileAutoTagger.sync_from_post_analysis!(profile: profile, analysis: analysis)
-
-
comment_result =
-
then: 0
if comment_generation_enabled?(pipeline_state: pipeline_state, pipeline_run_id: pipeline_run_id)
-
Ai::PostCommentGenerationService.new(
-
account: account,
-
profile: profile,
-
post: post,
-
enforce_required_evidence: comment_evidence_policy_enforced?(pipeline_state: pipeline_state, pipeline_run_id: pipeline_run_id)
-
).run!
-
else: 0
else
-
{
-
blocked: true,
-
status: "disabled_by_task_flags",
-
source: "policy",
-
suggestions_count: 0,
-
reason_code: "comments_disabled"
-
}
-
end
-
-
retry_result =
-
then: 0
if comment_retry_enabled?(pipeline_state: pipeline_state, pipeline_run_id: pipeline_run_id)
-
enqueue_comment_retry_if_needed!(
-
account: account,
-
profile: profile,
-
post: post,
-
comment_result: comment_result
-
)
-
else: 0
else
-
{ queued: false, reason: "retry_disabled" }
-
end
-
-
pipeline_state.mark_step_completed!(
-
run_id: pipeline_run_id,
-
step: "metadata",
-
status: "succeeded",
-
result: {
-
face_count: face_meta["face_count"].to_i,
-
participant_summary_present: face_meta["participant_summary"].to_s.present?,
-
comment_generation_status: comment_result[:status].to_s,
-
comment_generation_blocked: ActiveModel::Type::Boolean.new.cast(comment_result[:blocked]),
-
comment_generation_source: comment_result[:source].to_s,
-
comment_suggestions_count: comment_result[:suggestions_count].to_i,
-
comment_reason_code: comment_result[:reason_code].to_s.presence,
-
comment_history_reason_code: comment_result[:history_reason_code].to_s.presence,
-
comment_retry_queued: ActiveModel::Type::Boolean.new.cast(retry_result[:queued]),
-
comment_retry_reason: retry_result[:reason].to_s.presence,
-
comment_retry_job_id: retry_result[:job_id].to_s.presence,
-
comment_retry_next_run_at: retry_result[:next_run_at].to_s.presence
-
}
-
)
-
rescue StandardError => e
-
then: 0
else: 0
then: 0
else: 0
context&.dig(:pipeline_state)&.mark_step_completed!(
-
run_id: pipeline_run_id,
-
step: "metadata",
-
status: "failed",
-
error: format_error(e),
-
result: {
-
reason: "metadata_tagging_failed"
-
}
-
)
-
raise
-
ensure
-
then: 0
else: 0
if context && enqueue_finalizer
-
enqueue_pipeline_finalizer(
-
account: context[:account],
-
profile: context[:profile],
-
post: context[:post],
-
pipeline_run_id: pipeline_run_id
-
)
-
end
-
end
-
-
1
private
-
-
1
def comment_generation_enabled?(pipeline_state:, pipeline_run_id:)
-
pipeline = pipeline_state.pipeline_for(run_id: pipeline_run_id)
-
then: 0
else: 0
flags = pipeline.is_a?(Hash) ? pipeline["task_flags"] : {}
-
else: 0
then: 0
flags = {} unless flags.is_a?(Hash)
-
-
then: 0
if flags.key?("generate_comments")
-
ActiveModel::Type::Boolean.new.cast(flags["generate_comments"])
-
else: 0
else
-
true
-
end
-
rescue StandardError
-
true
-
end
-
-
1
def comment_evidence_policy_enforced?(pipeline_state:, pipeline_run_id:)
-
pipeline = pipeline_state.pipeline_for(run_id: pipeline_run_id)
-
then: 0
else: 0
flags = pipeline.is_a?(Hash) ? pipeline["task_flags"] : {}
-
else: 0
then: 0
flags = {} unless flags.is_a?(Hash)
-
-
then: 0
if flags.key?("enforce_comment_evidence_policy")
-
ActiveModel::Type::Boolean.new.cast(flags["enforce_comment_evidence_policy"])
-
else: 0
else
-
true
-
end
-
rescue StandardError
-
true
-
end
-
-
1
def comment_retry_enabled?(pipeline_state:, pipeline_run_id:)
-
pipeline = pipeline_state.pipeline_for(run_id: pipeline_run_id)
-
then: 0
else: 0
flags = pipeline.is_a?(Hash) ? pipeline["task_flags"] : {}
-
else: 0
then: 0
flags = {} unless flags.is_a?(Hash)
-
-
then: 0
if flags.key?("retry_on_incomplete_profile")
-
ActiveModel::Type::Boolean.new.cast(flags["retry_on_incomplete_profile"])
-
else: 0
else
-
true
-
end
-
rescue StandardError
-
true
-
end
-
-
1
def enqueue_comment_retry_if_needed!(account:, profile:, post:, comment_result:)
-
else: 0
then: 0
return { queued: false, reason: "comment_not_blocked" } unless ActiveModel::Type::Boolean.new.cast(comment_result[:blocked])
-
else: 0
then: 0
return { queued: false, reason: "reason_not_retryable" } unless comment_result[:reason_code].to_s == "missing_required_evidence"
-
-
then: 0
else: 0
metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
policy = metadata["comment_generation_policy"]
-
else: 0
then: 0
return { queued: false, reason: "policy_missing" } unless policy.is_a?(Hash)
-
then: 0
else: 0
return { queued: false, reason: "history_ready" } if ActiveModel::Type::Boolean.new.cast(policy["history_ready"])
-
-
history_reason_code = policy["history_reason_code"].to_s
-
else: 0
then: 0
return { queued: false, reason: "history_reason_not_retryable" } unless PROFILE_INCOMPLETE_REASON_CODES.include?(history_reason_code)
-
-
then: 0
else: 0
retry_state = policy["retry_state"].is_a?(Hash) ? policy["retry_state"].deep_dup : {}
-
attempts = retry_state["attempts"].to_i
-
then: 0
else: 0
return { queued: false, reason: "retry_attempts_exhausted" } if attempts >= COMMENT_RETRY_MAX_ATTEMPTS
-
-
build_history_result = BuildInstagramProfileHistoryJob.enqueue_with_resume_if_needed!(
-
account: account,
-
profile: profile,
-
trigger_source: "post_metadata_comment_fallback",
-
requested_by: self.class.name,
-
resume_job: {
-
job_class: AnalyzeInstagramProfilePostJob,
-
job_kwargs: {
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
pipeline_mode: "inline",
-
task_flags: {
-
analyze_visual: false,
-
analyze_faces: false,
-
run_ocr: false,
-
run_video: false,
-
run_metadata: true,
-
generate_comments: true,
-
enforce_comment_evidence_policy: true,
-
retry_on_incomplete_profile: true
-
}
-
}
-
}
-
)
-
else: 0
then: 0
return { queued: false, reason: build_history_result[:reason] } unless ActiveModel::Type::Boolean.new.cast(build_history_result[:accepted])
-
-
retry_state["attempts"] = attempts + 1
-
retry_state["last_reason_code"] = history_reason_code
-
retry_state["last_blocked_at"] = Time.current.iso8601(3)
-
retry_state["last_enqueued_at"] = Time.current.iso8601(3)
-
retry_state["next_run_at"] = build_history_result[:next_run_at].to_s.presence
-
retry_state["job_id"] = build_history_result[:job_id].to_s.presence
-
then: 0
else: 0
retry_state["build_history_action_log_id"] = build_history_result[:action_log_id].to_i if build_history_result[:action_log_id].present?
-
retry_state["source"] = self.class.name
-
retry_state["mode"] = "build_history_fallback"
-
-
policy["retry_state"] = retry_state
-
policy["updated_at"] = Time.current.iso8601(3)
-
metadata["comment_generation_policy"] = policy
-
post.update!(metadata: metadata)
-
-
{
-
queued: true,
-
reason: "build_history_fallback_registered",
-
job_id: build_history_result[:job_id].to_s,
-
action_log_id: build_history_result[:action_log_id],
-
next_run_at: build_history_result[:next_run_at].to_s
-
}
-
rescue StandardError => e
-
{
-
queued: false,
-
reason: "retry_enqueue_failed",
-
error_class: e.class.name,
-
error_message: e.message.to_s
-
}
-
end
-
end
-
require "timeout"
-
-
class ProcessPostOcrAnalysisJob < PostAnalysisPipelineJob
-
queue_as :ai_ocr_queue
-
-
MAX_DEFER_ATTEMPTS = ENV.fetch("AI_OCR_MAX_DEFER_ATTEMPTS", 4).to_i.clamp(1, 12)
-
-
retry_on Net::OpenTimeout, Net::ReadTimeout, wait: :polynomially_longer, attempts: 3
-
retry_on Errno::ECONNRESET, Errno::ECONNREFUSED, wait: :polynomially_longer, attempts: 3
-
retry_on Timeout::Error, wait: :polynomially_longer, attempts: 2
-
-
def perform(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, pipeline_run_id:, defer_attempt: 0)
-
enqueue_finalizer = true
-
context = load_pipeline_context!(
-
instagram_account_id: instagram_account_id,
-
instagram_profile_id: instagram_profile_id,
-
instagram_profile_post_id: instagram_profile_post_id,
-
pipeline_run_id: pipeline_run_id
-
)
-
return unless context
-
-
account = context[:account]
-
post = context[:post]
-
pipeline_state = context[:pipeline_state]
-
if pipeline_state.pipeline_terminal?(run_id: pipeline_run_id) || pipeline_state.step_terminal?(run_id: pipeline_run_id, step: "ocr")
-
enqueue_finalizer = false
-
Ops::StructuredLogger.info(
-
event: "ai.ocr_analysis.skipped_terminal",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: account.id,
-
instagram_profile_id: context[:profile].id,
-
instagram_profile_post_id: post.id,
-
pipeline_run_id: pipeline_run_id
-
}
-
)
-
return
-
end
-
-
unless resource_available?(defer_attempt: defer_attempt, context: context, pipeline_run_id: pipeline_run_id)
-
return
-
end
-
-
pipeline_state.mark_step_running!(
-
run_id: pipeline_run_id,
-
step: "ocr",
-
queue_name: queue_name,
-
active_job_id: job_id
-
)
-
-
reused = reuse_ocr_from_face_metadata(post: post)
-
result =
-
if reused
-
reused
-
else
-
context_builder = Ai::PostAnalysisContextBuilder.new(profile: context[:profile], post: post)
-
image_payload = context_builder.detection_image_payload
-
if ActiveModel::Type::Boolean.new.cast(image_payload[:skipped])
-
{
-
skipped: true,
-
ocr_text: nil,
-
ocr_blocks: [],
-
metadata: {
-
source: "post_ocr_service",
-
reason: image_payload[:reason].to_s.presence || "image_payload_unavailable"
-
}
-
}
-
else
-
Timeout.timeout(ocr_timeout_seconds) do
-
Ai::PostOcrService.new.extract_from_image_bytes(
-
image_bytes: image_payload[:image_bytes],
-
usage_context: {
-
workflow: "post_analysis_pipeline",
-
task: "ocr",
-
post_id: post.id,
-
instagram_account_id: account.id
-
}
-
)
-
end
-
end
-
end
-
-
persist_ocr_result!(post: post, result: result)
-
-
pipeline_state.mark_step_completed!(
-
run_id: pipeline_run_id,
-
step: "ocr",
-
status: "succeeded",
-
result: {
-
skipped: ActiveModel::Type::Boolean.new.cast(result[:skipped]),
-
text_present: result[:ocr_text].to_s.present?,
-
ocr_blocks_count: Array(result[:ocr_blocks]).length,
-
source: result.dig(:metadata, :source) || result.dig("metadata", "source")
-
}.compact
-
)
-
rescue StandardError => e
-
context&.dig(:pipeline_state)&.mark_step_completed!(
-
run_id: pipeline_run_id,
-
step: "ocr",
-
status: "failed",
-
error: format_error(e),
-
result: {
-
reason: "ocr_analysis_failed"
-
}
-
)
-
raise
-
ensure
-
if context && enqueue_finalizer
-
enqueue_pipeline_finalizer(
-
account: context[:account],
-
profile: context[:profile],
-
post: context[:post],
-
pipeline_run_id: pipeline_run_id
-
)
-
end
-
end
-
-
private
-
-
def resource_available?(defer_attempt:, context:, pipeline_run_id:)
-
guard = Ops::ResourceGuard.allow_ai_task?(task: "ocr", queue_name: queue_name, critical: false)
-
return true if ActiveModel::Type::Boolean.new.cast(guard[:allow])
-
-
if defer_attempt.to_i >= MAX_DEFER_ATTEMPTS
-
context[:pipeline_state].mark_step_completed!(
-
run_id: pipeline_run_id,
-
step: "ocr",
-
status: "failed",
-
error: "resource_guard_exhausted: #{guard[:reason]}",
-
result: {
-
reason: "resource_constraints",
-
snapshot: guard[:snapshot]
-
}
-
)
-
return false
-
end
-
-
retry_seconds = guard[:retry_in_seconds].to_i
-
retry_seconds = 20 if retry_seconds <= 0
-
-
context[:pipeline_state].mark_step_queued!(
-
run_id: pipeline_run_id,
-
step: "ocr",
-
queue_name: queue_name,
-
active_job_id: job_id,
-
result: {
-
reason: "resource_constrained",
-
defer_attempt: defer_attempt.to_i,
-
retry_in_seconds: retry_seconds,
-
snapshot: guard[:snapshot]
-
}
-
)
-
-
self.class.set(wait: retry_seconds.seconds).perform_later(
-
instagram_account_id: context[:account].id,
-
instagram_profile_id: context[:profile].id,
-
instagram_profile_post_id: context[:post].id,
-
pipeline_run_id: pipeline_run_id,
-
defer_attempt: defer_attempt.to_i + 1
-
)
-
-
false
-
end
-
-
def reuse_ocr_from_face_metadata(post:)
-
face_meta = post.metadata.is_a?(Hash) ? post.metadata.dig("face_recognition") : nil
-
return nil unless face_meta.is_a?(Hash)
-
-
text = face_meta["ocr_text"].to_s.strip
-
blocks = Array(face_meta["ocr_blocks"]).select { |row| row.is_a?(Hash) }
-
return nil if text.blank? && blocks.empty?
-
-
{
-
skipped: false,
-
ocr_text: text.presence,
-
ocr_blocks: blocks.first(80),
-
metadata: {
-
source: "face_recognition_cache"
-
}
-
}
-
end
-
-
def persist_ocr_result!(post:, result:)
-
post.with_lock do
-
post.reload
-
analysis = post.analysis.is_a?(Hash) ? post.analysis.deep_dup : {}
-
metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
-
analysis["ocr_text"] = result[:ocr_text] if result.key?(:ocr_text)
-
analysis["ocr_blocks"] = Array(result[:ocr_blocks]).first(40) if result.key?(:ocr_blocks)
-
-
metadata["ocr_analysis"] = {
-
"ocr_text" => result[:ocr_text].to_s.presence,
-
"ocr_blocks" => Array(result[:ocr_blocks]).first(80),
-
"source" => result.dig(:metadata, :source) || result.dig("metadata", "source"),
-
"reason" => result.dig(:metadata, :reason) || result.dig("metadata", "reason"),
-
"error_message" => result.dig(:metadata, :error_message) || result.dig("metadata", "error_message"),
-
"updated_at" => Time.current.iso8601(3)
-
}.compact
-
-
post.update!(analysis: analysis, metadata: metadata)
-
end
-
end
-
-
def ocr_timeout_seconds
-
ENV.fetch("AI_OCR_TIMEOUT_SECONDS", 150).to_i.clamp(15, 360)
-
end
-
end
-
require "timeout"
-
-
class ProcessPostVideoAnalysisJob < PostAnalysisPipelineJob
-
queue_as :video_processing_queue
-
-
MAX_DEFER_ATTEMPTS = ENV.fetch("AI_VIDEO_MAX_DEFER_ATTEMPTS", 4).to_i.clamp(1, 12)
-
-
retry_on Timeout::Error, wait: :polynomially_longer, attempts: 2
-
-
def perform(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, pipeline_run_id:, defer_attempt: 0)
-
enqueue_finalizer = true
-
context = load_pipeline_context!(
-
instagram_account_id: instagram_account_id,
-
instagram_profile_id: instagram_profile_id,
-
instagram_profile_post_id: instagram_profile_post_id,
-
pipeline_run_id: pipeline_run_id
-
)
-
return unless context
-
-
pipeline_state = context[:pipeline_state]
-
if pipeline_state.pipeline_terminal?(run_id: pipeline_run_id) || pipeline_state.step_terminal?(run_id: pipeline_run_id, step: "video")
-
enqueue_finalizer = false
-
Ops::StructuredLogger.info(
-
event: "ai.video_analysis.skipped_terminal",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: context[:account].id,
-
instagram_profile_id: context[:profile].id,
-
instagram_profile_post_id: context[:post].id,
-
pipeline_run_id: pipeline_run_id
-
}
-
)
-
return
-
end
-
-
unless resource_available?(defer_attempt: defer_attempt, context: context, pipeline_run_id: pipeline_run_id)
-
return
-
end
-
-
profile = context[:profile]
-
post = context[:post]
-
-
pipeline_state.mark_step_running!(
-
run_id: pipeline_run_id,
-
step: "video",
-
queue_name: queue_name,
-
active_job_id: job_id
-
)
-
-
builder = Ai::PostAnalysisContextBuilder.new(profile: profile, post: post)
-
payload = builder.video_payload
-
-
if ActiveModel::Type::Boolean.new.cast(payload[:skipped])
-
persist_video_analysis!(post: post, result: payload)
-
pipeline_state.mark_step_completed!(
-
run_id: pipeline_run_id,
-
step: "video",
-
status: "succeeded",
-
result: {
-
skipped: true,
-
reason: payload[:reason].to_s
-
}
-
)
-
return
-
end
-
-
result = Timeout.timeout(video_timeout_seconds) do
-
PostVideoContextExtractionService.new.extract(
-
video_bytes: payload[:video_bytes],
-
reference_id: payload[:reference_id].to_s.presence || "post_media_#{post.id}",
-
content_type: payload[:content_type]
-
)
-
end
-
-
persist_video_analysis!(post: post, result: result)
-
-
pipeline_state.mark_step_completed!(
-
run_id: pipeline_run_id,
-
step: "video",
-
status: "succeeded",
-
result: {
-
skipped: ActiveModel::Type::Boolean.new.cast(result[:skipped]),
-
processing_mode: result[:processing_mode].to_s,
-
static: ActiveModel::Type::Boolean.new.cast(result[:static]),
-
semantic_route: result[:semantic_route].to_s.presence,
-
duration_seconds: result[:duration_seconds],
-
has_audio: ActiveModel::Type::Boolean.new.cast(result[:has_audio]),
-
transcript_present: result[:transcript].to_s.present?,
-
topics_count: Array(result[:topics]).length
-
}
-
)
-
rescue StandardError => e
-
context&.dig(:pipeline_state)&.mark_step_completed!(
-
run_id: pipeline_run_id,
-
step: "video",
-
status: "failed",
-
error: format_error(e),
-
result: {
-
reason: "video_analysis_failed"
-
}
-
)
-
raise
-
ensure
-
if context && enqueue_finalizer
-
enqueue_pipeline_finalizer(
-
account: context[:account],
-
profile: context[:profile],
-
post: context[:post],
-
pipeline_run_id: pipeline_run_id
-
)
-
end
-
end
-
-
private
-
-
def resource_available?(defer_attempt:, context:, pipeline_run_id:)
-
guard = Ops::ResourceGuard.allow_ai_task?(task: "video", queue_name: queue_name, critical: false)
-
return true if ActiveModel::Type::Boolean.new.cast(guard[:allow])
-
-
if defer_attempt.to_i >= MAX_DEFER_ATTEMPTS
-
context[:pipeline_state].mark_step_completed!(
-
run_id: pipeline_run_id,
-
step: "video",
-
status: "failed",
-
error: "resource_guard_exhausted: #{guard[:reason]}",
-
result: {
-
reason: "resource_constraints",
-
snapshot: guard[:snapshot]
-
}
-
)
-
return false
-
end
-
-
retry_seconds = guard[:retry_in_seconds].to_i
-
retry_seconds = 20 if retry_seconds <= 0
-
-
context[:pipeline_state].mark_step_queued!(
-
run_id: pipeline_run_id,
-
step: "video",
-
queue_name: queue_name,
-
active_job_id: job_id,
-
result: {
-
reason: "resource_constrained",
-
defer_attempt: defer_attempt.to_i,
-
retry_in_seconds: retry_seconds,
-
snapshot: guard[:snapshot]
-
}
-
)
-
-
self.class.set(wait: retry_seconds.seconds).perform_later(
-
instagram_account_id: context[:account].id,
-
instagram_profile_id: context[:profile].id,
-
instagram_profile_post_id: context[:post].id,
-
pipeline_run_id: pipeline_run_id,
-
defer_attempt: defer_attempt.to_i + 1
-
)
-
-
false
-
end
-
-
def persist_video_analysis!(post:, result:)
-
normalized = normalize_video_result(result)
-
post.with_lock do
-
post.reload
-
analysis = post.analysis.is_a?(Hash) ? post.analysis.deep_dup : {}
-
metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
-
analysis["video_processing_mode"] = normalized[:processing_mode].to_s if normalized.key?(:processing_mode)
-
analysis["video_static_detected"] = ActiveModel::Type::Boolean.new.cast(normalized[:static]) if normalized.key?(:static)
-
analysis["video_semantic_route"] = normalized[:semantic_route].to_s if normalized[:semantic_route].to_s.present?
-
analysis["video_duration_seconds"] = normalized[:duration_seconds] if normalized.key?(:duration_seconds)
-
analysis["video_context_summary"] = normalized[:context_summary].to_s if normalized[:context_summary].to_s.present?
-
analysis["transcript"] = normalized[:transcript].to_s if normalized[:transcript].to_s.present?
-
analysis["video_topics"] = normalized[:topics] if normalized[:topics].is_a?(Array)
-
analysis["video_objects"] = normalized[:objects] if normalized[:objects].is_a?(Array)
-
analysis["video_scenes"] = normalized[:scenes] if normalized[:scenes].is_a?(Array)
-
analysis["video_hashtags"] = normalized[:hashtags] if normalized[:hashtags].is_a?(Array)
-
analysis["video_mentions"] = normalized[:mentions] if normalized[:mentions].is_a?(Array)
-
analysis["video_profile_handles"] = normalized[:profile_handles] if normalized[:profile_handles].is_a?(Array)
-
analysis["video_ocr_text"] = normalized[:ocr_text].to_s if normalized[:ocr_text].to_s.present?
-
analysis["video_ocr_blocks"] = normalized[:ocr_blocks] if normalized[:ocr_blocks].is_a?(Array)
-
-
analysis["topics"] = merge_strings(analysis["topics"], normalized[:topics], limit: 40)
-
analysis["objects"] = merge_strings(analysis["objects"], normalized[:objects], limit: 50)
-
analysis["hashtags"] = merge_strings(analysis["hashtags"], normalized[:hashtags], limit: 50)
-
analysis["mentions"] = merge_strings(analysis["mentions"], normalized[:mentions], limit: 50)
-
-
if analysis["ocr_text"].to_s.blank? && normalized[:ocr_text].to_s.present?
-
analysis["ocr_text"] = normalized[:ocr_text].to_s
-
end
-
if Array(analysis["ocr_blocks"]).empty? && normalized[:ocr_blocks].is_a?(Array)
-
analysis["ocr_blocks"] = normalized[:ocr_blocks].first(40)
-
end
-
-
metadata["video_processing"] = {
-
"skipped" => ActiveModel::Type::Boolean.new.cast(normalized[:skipped]),
-
"processing_mode" => normalized[:processing_mode].to_s,
-
"static" => ActiveModel::Type::Boolean.new.cast(normalized[:static]),
-
"semantic_route" => normalized[:semantic_route].to_s.presence,
-
"duration_seconds" => normalized[:duration_seconds],
-
"has_audio" => ActiveModel::Type::Boolean.new.cast(normalized[:has_audio]),
-
"transcript" => normalized[:transcript].to_s.presence,
-
"topics" => normalized[:topics],
-
"objects" => normalized[:objects],
-
"scenes" => normalized[:scenes],
-
"hashtags" => normalized[:hashtags],
-
"mentions" => normalized[:mentions],
-
"profile_handles" => normalized[:profile_handles],
-
"ocr_text" => normalized[:ocr_text].to_s.presence,
-
"ocr_blocks" => normalized[:ocr_blocks],
-
"context_summary" => normalized[:context_summary].to_s.presence,
-
"metadata" => normalized[:metadata],
-
"updated_at" => Time.current.iso8601(3)
-
}.compact
-
-
post.update!(analysis: analysis, metadata: metadata)
-
end
-
end
-
-
def normalize_video_result(result)
-
row = result.is_a?(Hash) ? result : {}
-
{
-
skipped: value_for(row, :skipped),
-
processing_mode: value_for(row, :processing_mode).to_s.presence || "dynamic_video",
-
static: value_for(row, :static),
-
semantic_route: value_for(row, :semantic_route),
-
duration_seconds: value_for(row, :duration_seconds),
-
has_audio: value_for(row, :has_audio),
-
transcript: value_for(row, :transcript),
-
topics: normalized_strings(value_for(row, :topics), limit: 40),
-
objects: normalized_strings(value_for(row, :objects), limit: 50),
-
scenes: Array(value_for(row, :scenes)).select { |value| value.is_a?(Hash) }.first(50),
-
hashtags: normalized_strings(value_for(row, :hashtags), limit: 50),
-
mentions: normalized_strings(value_for(row, :mentions), limit: 50),
-
profile_handles: normalized_strings(value_for(row, :profile_handles), limit: 50),
-
ocr_text: value_for(row, :ocr_text),
-
ocr_blocks: Array(value_for(row, :ocr_blocks)).select { |value| value.is_a?(Hash) }.first(80),
-
context_summary: value_for(row, :context_summary),
-
metadata: row[:metadata] || row["metadata"] || { reason: row[:reason] || row["reason"] }
-
}
-
end
-
-
def normalized_strings(values, limit:)
-
Array(values).map(&:to_s).map(&:strip).reject(&:blank?).uniq.first(limit)
-
end
-
-
def merge_strings(existing, incoming, limit:)
-
normalized_strings(Array(existing) + Array(incoming), limit: limit)
-
end
-
-
def value_for(row, key)
-
return row[key] if row.key?(key)
-
return row[key.to_s] if row.key?(key.to_s)
-
-
nil
-
end
-
-
def video_timeout_seconds
-
ENV.fetch("AI_VIDEO_TIMEOUT_SECONDS", 180).to_i.clamp(20, 420)
-
end
-
end
-
require "timeout"
-
-
class ProcessPostVisualAnalysisJob < PostAnalysisPipelineJob
-
queue_as :ai_visual_queue
-
-
MAX_VISUAL_ATTEMPTS = ENV.fetch("AI_VISUAL_MAX_ATTEMPTS", 6).to_i.clamp(1, 20)
-
-
retry_on Net::OpenTimeout, Net::ReadTimeout, wait: :polynomially_longer, attempts: 3
-
retry_on Errno::ECONNRESET, Errno::ECONNREFUSED, wait: :polynomially_longer, attempts: 3
-
retry_on Timeout::Error, wait: :polynomially_longer, attempts: 2
-
-
def perform(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, pipeline_run_id:)
-
enqueue_finalizer = true
-
context = load_pipeline_context!(
-
instagram_account_id: instagram_account_id,
-
instagram_profile_id: instagram_profile_id,
-
instagram_profile_post_id: instagram_profile_post_id,
-
pipeline_run_id: pipeline_run_id
-
)
-
return unless context
-
-
account = context[:account]
-
profile = context[:profile]
-
post = context[:post]
-
pipeline_state = context[:pipeline_state]
-
started_monotonic = Process.clock_gettime(Process::CLOCK_MONOTONIC) rescue nil
-
-
if pipeline_state.pipeline_terminal?(run_id: pipeline_run_id) || pipeline_state.step_terminal?(run_id: pipeline_run_id, step: "visual")
-
enqueue_finalizer = false
-
Ops::StructuredLogger.info(
-
event: "ai.visual_analysis.skipped_terminal",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
pipeline_run_id: pipeline_run_id
-
}
-
)
-
return
-
end
-
-
if visual_attempts_exhausted?(pipeline_state: pipeline_state, pipeline_run_id: pipeline_run_id)
-
pipeline_state.mark_step_completed!(
-
run_id: pipeline_run_id,
-
step: "visual",
-
status: "failed",
-
error: "visual_attempts_exhausted",
-
result: {
-
reason: "visual_attempts_exhausted",
-
max_attempts: MAX_VISUAL_ATTEMPTS
-
}
-
)
-
-
Ops::StructuredLogger.warn(
-
event: "ai.visual_analysis.exhausted",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
pipeline_run_id: pipeline_run_id,
-
max_attempts: MAX_VISUAL_ATTEMPTS
-
}
-
)
-
return
-
end
-
-
pipeline_state.mark_step_running!(
-
run_id: pipeline_run_id,
-
step: "visual",
-
queue_name: queue_name,
-
active_job_id: job_id
-
)
-
-
builder = Ai::PostAnalysisContextBuilder.new(profile: profile, post: post)
-
payload = builder.payload
-
media = builder.media_payload
-
fingerprint = builder.media_fingerprint(media: media)
-
media_summary = media_context(media: media)
-
-
if media_summary[:media_type] == "none"
-
Ops::StructuredLogger.warn(
-
event: "ai.visual_analysis.media_skipped",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
pipeline_run_id: pipeline_run_id,
-
reason: media_summary[:reason],
-
media_content_type: media_summary[:media_content_type]
-
}
-
)
-
end
-
-
run = Timeout.timeout(visual_timeout_seconds) do
-
Ai::Runner.new(account: account).analyze!(
-
purpose: "post",
-
analyzable: post,
-
payload: payload,
-
media: media,
-
media_fingerprint: fingerprint,
-
provider_options: {
-
visual_only: true,
-
include_faces: false,
-
include_ocr: false,
-
include_comment_generation: false
-
}
-
)
-
end
-
-
duration_ms =
-
if started_monotonic
-
((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_monotonic) * 1000).round
-
end
-
-
post.update!(
-
ai_provider: run[:provider].key,
-
ai_model: run.dig(:result, :model),
-
analysis: run.dig(:result, :analysis),
-
ai_status: "running"
-
)
-
-
pipeline_state.mark_step_completed!(
-
run_id: pipeline_run_id,
-
step: "visual",
-
status: "succeeded",
-
result: {
-
provider: run[:provider].key,
-
model: run.dig(:result, :model),
-
ai_analysis_id: run[:record]&.id,
-
cache_hit: ActiveModel::Type::Boolean.new.cast(run[:cached]),
-
media_type: media_summary[:media_type],
-
media_content_type: media_summary[:media_content_type],
-
media_source: media_summary[:media_source],
-
media_byte_size: media_summary[:media_byte_size],
-
duration_ms: duration_ms
-
}
-
)
-
rescue StandardError => e
-
context&.dig(:pipeline_state)&.mark_step_completed!(
-
run_id: pipeline_run_id,
-
step: "visual",
-
status: "failed",
-
error: format_error(e),
-
result: {
-
reason: "visual_analysis_failed"
-
}
-
)
-
-
Ops::StructuredLogger.warn(
-
event: "ai.visual_analysis.failed",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: context&.dig(:account)&.id || instagram_account_id,
-
instagram_profile_id: context&.dig(:profile)&.id || instagram_profile_id,
-
instagram_profile_post_id: context&.dig(:post)&.id || instagram_profile_post_id,
-
pipeline_run_id: pipeline_run_id,
-
error_class: e.class.name,
-
error_message: e.message.to_s.byteslice(0, 280),
-
retryable: retryable_visual_error?(e)
-
}
-
)
-
-
raise if retryable_visual_error?(e)
-
ensure
-
if context && enqueue_finalizer
-
enqueue_pipeline_finalizer(
-
account: context[:account],
-
profile: context[:profile],
-
post: context[:post],
-
pipeline_run_id: pipeline_run_id
-
)
-
end
-
end
-
-
private
-
-
def visual_attempts_exhausted?(pipeline_state:, pipeline_run_id:)
-
attempts = pipeline_state.step_state(run_id: pipeline_run_id, step: "visual").to_h["attempts"].to_i
-
attempts >= MAX_VISUAL_ATTEMPTS
-
end
-
-
def retryable_visual_error?(error)
-
return true if error.is_a?(Timeout::Error)
-
return true if error.is_a?(Net::OpenTimeout) || error.is_a?(Net::ReadTimeout)
-
return true if error.is_a?(Errno::ECONNRESET) || error.is_a?(Errno::ECONNREFUSED)
-
-
false
-
end
-
-
def media_context(media:)
-
payload = media.is_a?(Hash) ? media : {}
-
bytes = payload[:bytes]
-
byte_size = bytes.respond_to?(:bytesize) ? bytes.bytesize : nil
-
-
{
-
media_type: payload[:type].to_s.presence || "none",
-
media_content_type: payload[:content_type].to_s.presence,
-
media_source: payload[:source].to_s.presence,
-
media_byte_size: byte_size,
-
reason: payload[:reason].to_s.presence
-
}
-
end
-
-
def visual_timeout_seconds
-
ENV.fetch("AI_VISUAL_TIMEOUT_SECONDS", 210).to_i.clamp(30, 600)
-
end
-
end
-
class PurgeExpiredInstagramPostMediaJob < ApplicationJob
-
queue_as :post_downloads
-
-
def perform(opts = nil, **kwargs)
-
params = normalize_params(opts, kwargs, limit: 200)
-
now = Time.current
-
scope = InstagramPost.where("purge_at IS NOT NULL AND purge_at <= ?", now).order(purge_at: :asc).limit(params[:limit].to_i.clamp(1, 2000))
-
-
scope.find_each do |post|
-
begin
-
post.media.purge if post.media.attached?
-
rescue StandardError
-
nil
-
end
-
post.update_columns(purge_at: nil) # avoid reprocessing
-
end
-
end
-
-
private
-
-
def normalize_params(opts, kwargs, defaults)
-
from_opts = opts.is_a?(Hash) ? opts.symbolize_keys : {}
-
defaults.merge(from_opts).merge(kwargs.symbolize_keys)
-
end
-
end
-
1
class RefreshAccountAuditLogsJob < ApplicationJob
-
1
queue_as :maintenance
-
-
1
THROTTLE_SECONDS = 2.0
-
1
THROTTLE_EXPIRY = 30.seconds
-
-
1
def self.enqueue_for(instagram_account_id:, limit: 120)
-
6
account_id = instagram_account_id.to_i
-
6
then: 0
else: 6
return if account_id <= 0
-
-
6
now = Time.current.to_f
-
6
key = throttle_key(account_id)
-
6
last_enqueued = Rails.cache.read(key).to_f
-
6
then: 0
else: 6
return if last_enqueued.positive? && (now - last_enqueued) < THROTTLE_SECONDS
-
-
6
Rails.cache.write(key, now, expires_in: THROTTLE_EXPIRY)
-
6
perform_later(instagram_account_id: account_id, limit: limit)
-
rescue StandardError
-
perform_later(instagram_account_id: account_id, limit: limit)
-
end
-
-
1
def perform(instagram_account_id:, limit: 120)
-
account = InstagramAccount.find_by(id: instagram_account_id)
-
else: 0
then: 0
return unless account
-
-
entries = Ops::AuditLogBuilder.for_account(instagram_account: account, limit: limit.to_i.clamp(20, 250))
-
Turbo::StreamsChannel.broadcast_replace_to(
-
account,
-
target: "account_audit_logs_section",
-
partial: "instagram_accounts/audit_logs_section",
-
locals: { recent_audit_entries: entries }
-
)
-
rescue StandardError => e
-
Rails.logger.warn("[RefreshAccountAuditLogsJob] failed for account_id=#{instagram_account_id}: #{e.class}: #{e.message}")
-
nil
-
end
-
-
1
def self.throttle_key(account_id)
-
6
"jobs:refresh_account_audit_logs:last_enqueued:#{account_id}"
-
end
-
1
private_class_method :throttle_key
-
end
-
require "timeout"
-
-
class RefreshProfilePostFaceIdentityJob < ApplicationJob
-
queue_as :ai_face_queue
-
-
retry_on Net::OpenTimeout, Net::ReadTimeout, wait: :polynomially_longer, attempts: 3
-
retry_on Errno::ECONNRESET, Errno::ECONNREFUSED, wait: :polynomially_longer, attempts: 3
-
retry_on Timeout::Error, wait: :polynomially_longer, attempts: 2
-
-
def perform(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, trigger_source: "profile_history_build")
-
account = InstagramAccount.find_by(id: instagram_account_id)
-
return unless account
-
-
profile = account.instagram_profiles.find_by(id: instagram_profile_id)
-
return unless profile
-
-
post = profile.instagram_profile_posts.find_by(id: instagram_profile_post_id)
-
return unless post && post.media.attached?
-
-
mark_face_refresh_state!(
-
post: post,
-
attributes: {
-
"status" => "running",
-
"started_at" => Time.current.iso8601(3),
-
"trigger_source" => trigger_source.to_s.presence || "profile_history_build",
-
"active_job_id" => job_id,
-
"queue_name" => queue_name
-
}
-
)
-
-
result = Timeout.timeout(face_refresh_timeout_seconds) do
-
PostFaceRecognitionService.new.process!(post: post)
-
end
-
-
mark_face_refresh_state!(
-
post: post,
-
attributes: {
-
"status" => "completed",
-
"finished_at" => Time.current.iso8601(3),
-
"result" => {
-
"skipped" => ActiveModel::Type::Boolean.new.cast(result[:skipped]),
-
"reason" => result[:reason].to_s.presence,
-
"face_count" => result[:face_count].to_i,
-
"linked_face_count" => result[:linked_face_count].to_i,
-
"low_confidence_filtered_count" => result[:low_confidence_filtered_count].to_i,
-
"matched_people_count" => Array(result[:matched_people]).length
-
}.compact
-
}
-
)
-
rescue StandardError => e
-
if defined?(post) && post&.persisted?
-
mark_face_refresh_state!(
-
post: post,
-
attributes: {
-
"status" => "failed",
-
"failed_at" => Time.current.iso8601(3),
-
"error_class" => e.class.name,
-
"error_message" => e.message.to_s.byteslice(0, 280)
-
}
-
)
-
end
-
raise
-
end
-
-
private
-
-
def face_refresh_timeout_seconds
-
ENV.fetch("PROFILE_HISTORY_FACE_REFRESH_TIMEOUT_SECONDS", "180").to_i.clamp(20, 420)
-
end
-
-
def mark_face_refresh_state!(post:, attributes:)
-
post.with_lock do
-
metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
history = metadata["history_build"].is_a?(Hash) ? metadata["history_build"].deep_dup : {}
-
state = history["face_refresh"].is_a?(Hash) ? history["face_refresh"].deep_dup : {}
-
state.merge!(attributes.to_h.compact)
-
history["face_refresh"] = state
-
history["updated_at"] = Time.current.iso8601(3)
-
metadata["history_build"] = history
-
post.update!(metadata: metadata)
-
end
-
rescue StandardError
-
nil
-
end
-
end
-
class RetryFailedBackgroundJobsJob < ApplicationJob
-
queue_as :sync
-
-
def perform(opts = nil, **kwargs)
-
params = normalize_params(opts, kwargs, limit: 20, max_attempts: 3, cooldown_minutes: 10)
-
Jobs::FailureRetry.enqueue_automatic_retries!(
-
limit: params[:limit],
-
max_attempts: params[:max_attempts],
-
cooldown: params[:cooldown_minutes].to_i.clamp(1, 120).minutes
-
)
-
end
-
-
private
-
-
def normalize_params(opts, kwargs, defaults)
-
from_opts = opts.is_a?(Hash) ? opts.symbolize_keys : {}
-
defaults.merge(from_opts).merge(kwargs.symbolize_keys)
-
end
-
end
-
class SendInstagramMessageJob < ApplicationJob
-
queue_as :messages
-
-
def perform(instagram_account_id:, instagram_message_id:)
-
message = InstagramMessage.find(instagram_message_id)
-
account = InstagramAccount.find(instagram_account_id)
-
-
raise "Message/account mismatch" unless message.instagram_account_id == account.id
-
-
message.update!(status: "queued", error_message: nil)
-
broadcast_message(account: account, message: message)
-
-
Instagram::Client.new(account: account).send_message_to_user!(
-
username: message.instagram_profile.username,
-
message_text: message.body
-
)
-
-
message.update!(status: "sent", sent_at: Time.current)
-
broadcast_message(account: account, message: message)
-
rescue StandardError => e
-
account ||= InstagramAccount.where(id: instagram_account_id).first
-
message ||= InstagramMessage.where(id: instagram_message_id).first
-
-
message&.update!(status: "failed", error_message: e.message)
-
broadcast_message(account: account, message: message) if account && message
-
raise
-
end
-
-
private
-
-
def broadcast_message(account:, message:)
-
Turbo::StreamsChannel.broadcast_replace_to(
-
account,
-
target: ActionView::RecordIdentifier.dom_id(message),
-
partial: "instagram_messages/row",
-
locals: { message: message }
-
)
-
end
-
end
-
class StoryProcessingJob < ApplicationJob
-
queue_as :frame_generation
-
-
def perform(instagram_story_id:, force: false)
-
story = InstagramStory.find_by(id: instagram_story_id)
-
unless story
-
Ops::StructuredLogger.info(
-
event: "story_processing.skipped_missing_story",
-
payload: { instagram_story_id: instagram_story_id, force: force }
-
)
-
return
-
end
-
-
StoryProcessingService.new(story: story, force: force).process!
-
end
-
end
-
class SyncAllHomeStoriesJob < ApplicationJob
-
queue_as :story_downloads
-
-
MAX_CYCLES = 30
-
-
def perform(instagram_account_id:, cycle_story_limit: SyncHomeStoryCarouselJob::STORY_BATCH_LIMIT)
-
account = InstagramAccount.find(instagram_account_id)
-
batch_limit = cycle_story_limit.to_i.clamp(1, SyncHomeStoryCarouselJob::STORY_BATCH_LIMIT)
-
-
totals = Hash.new(0)
-
cycles = 0
-
idle_cycles = 0
-
stop_reason = "max_cycles_reached"
-
-
MAX_CYCLES.times do
-
cycles += 1
-
result = Instagram::Client.new(account: account).sync_home_story_carousel!(story_limit: batch_limit, auto_reply_only: false)
-
merge_totals!(totals, result)
-
-
moved_work = result[:downloaded].to_i + result[:commented].to_i + result[:analyzed].to_i
-
idle_cycles = moved_work.zero? ? idle_cycles + 1 : 0
-
-
if result[:stories_visited].to_i < batch_limit
-
stop_reason = "depleted_before_batch_limit"
-
break
-
end
-
-
if idle_cycles >= 2
-
stop_reason = "no_new_work_for_two_cycles"
-
break
-
end
-
end
-
-
message = "Continuous story sync done: cycles=#{cycles}, reason=#{stop_reason}, visited=#{totals[:stories_visited]}, downloaded=#{totals[:downloaded]}, analyzed=#{totals[:analyzed]}, commented=#{totals[:commented]}, reacted=#{totals[:reacted]}, skipped_ads=#{totals[:skipped_ads]}, skipped_unreplyable=#{totals[:skipped_unreplyable]}, skipped_interaction_retry=#{totals[:skipped_interaction_retry]}, skipped_reshared_external_link=#{totals[:skipped_reshared_external_link]}, failed=#{totals[:failed]}."
-
kind = totals[:failed].to_i.positive? ? "alert" : "notice"
-
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: kind, message: message }
-
)
-
rescue StandardError => e
-
account ||= InstagramAccount.where(id: instagram_account_id).first
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Continuous story sync failed: #{e.message}" }
-
) if account
-
raise
-
end
-
-
private
-
-
def merge_totals!(totals, result)
-
%i[
-
stories_visited downloaded analyzed commented reacted skipped_video skipped_not_tagged
-
skipped_ads skipped_invalid_media skipped_unreplyable skipped_interaction_retry skipped_reshared_external_link skipped_out_of_network failed
-
].each do |key|
-
totals[key] += result[key].to_i
-
end
-
end
-
end
-
class SyncFollowGraphJob < ApplicationJob
-
queue_as :sync
-
-
retry_on Selenium::WebDriver::Error::StaleElementReferenceError, wait: 3.seconds, attempts: 3
-
-
def perform(instagram_account_id:, sync_run_id:)
-
account = InstagramAccount.find(instagram_account_id)
-
sync_run = account.sync_runs.find(sync_run_id)
-
-
sync_run.update!(status: "running", started_at: Time.current, error_message: nil)
-
broadcast_status(account: account, sync_run: sync_run)
-
-
stats = Instagram::Client.new(account: account).sync_follow_graph!
-
-
sync_run.update!(status: "succeeded", finished_at: Time.current, stats: stats)
-
broadcast_status(account: account, sync_run: sync_run)
-
broadcast_notice(account: account, message: "Follow graph sync complete: #{stats[:profiles_total]} profiles (mutuals: #{stats[:mutuals]}).")
-
rescue StandardError => e
-
account ||= InstagramAccount.where(id: instagram_account_id).first
-
sync_run ||= account&.sync_runs&.where(id: sync_run_id)&.first
-
-
sync_run&.update!(status: "failed", finished_at: Time.current, error_message: e.message)
-
broadcast_status(account: account, sync_run: sync_run) if account && sync_run
-
broadcast_alert(account: account, message: "Follow graph sync failed: #{e.message}") if account
-
raise
-
end
-
-
private
-
-
def broadcast_status(account:, sync_run:)
-
Turbo::StreamsChannel.broadcast_replace_to(
-
account,
-
target: "sync_status",
-
partial: "sync_runs/status",
-
locals: { sync_run: sync_run }
-
)
-
end
-
-
def broadcast_notice(account:, message:)
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: message }
-
)
-
end
-
-
def broadcast_alert(account:, message:)
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: message }
-
)
-
end
-
end
-
class SyncHomeStoryCarouselJob < ApplicationJob
-
queue_as :story_downloads
-
-
STORY_BATCH_LIMIT = 10
-
-
retry_on Net::OpenTimeout, Net::ReadTimeout, wait: :polynomially_longer, attempts: 3
-
retry_on Errno::ECONNRESET, Errno::ECONNREFUSED, wait: :polynomially_longer, attempts: 3
-
selenium_timeout_error = "Selenium::WebDriver::Error::TimeoutError".safe_constantize
-
retry_on selenium_timeout_error, wait: :polynomially_longer, attempts: 2 if selenium_timeout_error
-
-
def perform(instagram_account_id:, story_limit: STORY_BATCH_LIMIT, auto_reply_only: false)
-
account = InstagramAccount.find(instagram_account_id)
-
limit = story_limit.to_i.clamp(1, STORY_BATCH_LIMIT)
-
tagged_only = ActiveModel::Type::Boolean.new.cast(auto_reply_only)
-
-
result = Instagram::Client.new(account: account).sync_home_story_carousel!(
-
story_limit: limit,
-
auto_reply_only: tagged_only
-
)
-
-
has_failure = result[:stories_visited].to_i.zero? || result[:failed].to_i.positive?
-
message =
-
if has_failure
-
"Home story sync finished with errors: visited=#{result[:stories_visited]}, failed=#{result[:failed]}, downloaded=#{result[:downloaded]}, analyzed=#{result[:analyzed]}, commented=#{result[:commented]}, reacted=#{result[:reacted]}, skipped_video=#{result[:skipped_video]}, skipped_ads=#{result[:skipped_ads]}, skipped_invalid_media=#{result[:skipped_invalid_media]}, skipped_unreplyable=#{result[:skipped_unreplyable]}, skipped_interaction_retry=#{result[:skipped_interaction_retry]}, skipped_reshared_external_link=#{result[:skipped_reshared_external_link]}, skipped_out_of_network=#{result[:skipped_out_of_network]}."
-
else
-
"Home story sync complete: visited=#{result[:stories_visited]}, downloaded=#{result[:downloaded]}, analyzed=#{result[:analyzed]}, commented=#{result[:commented]}, reacted=#{result[:reacted]}, skipped_video=#{result[:skipped_video]}, skipped_ads=#{result[:skipped_ads]}, skipped_invalid_media=#{result[:skipped_invalid_media]}, skipped_unreplyable=#{result[:skipped_unreplyable]}, skipped_interaction_retry=#{result[:skipped_interaction_retry]}, skipped_reshared_external_link=#{result[:skipped_reshared_external_link]}, skipped_out_of_network=#{result[:skipped_out_of_network]}."
-
end
-
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: {
-
kind: has_failure ? "alert" : "notice",
-
message: message
-
}
-
)
-
rescue StandardError => e
-
account ||= InstagramAccount.where(id: instagram_account_id).first
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Home story sync failed: #{e.message}" }
-
) if account
-
raise
-
end
-
end
-
require "base64"
-
require "net/http"
-
require "digest"
-
require "stringio"
-
-
class SyncInstagramProfileStoriesJob < ApplicationJob
-
queue_as :story_downloads
-
-
MAX_INLINE_IMAGE_BYTES = 2 * 1024 * 1024
-
MAX_INLINE_VIDEO_BYTES = 10 * 1024 * 1024
-
MAX_STORIES = 10
-
MAX_PREVIEW_IMAGE_BYTES = 3 * 1024 * 1024
-
-
def perform(instagram_account_id:, instagram_profile_id:, profile_action_log_id: nil, max_stories: MAX_STORIES, force_analyze_all: false, auto_reply: false, require_auto_reply_tag: false)
-
account = InstagramAccount.find(instagram_account_id)
-
profile = account.instagram_profiles.find(instagram_profile_id)
-
max_stories_i = max_stories.to_i.clamp(1, 10)
-
force = ActiveModel::Type::Boolean.new.cast(force_analyze_all)
-
auto_reply_enabled = ActiveModel::Type::Boolean.new.cast(auto_reply)
-
action_log = find_or_create_action_log(
-
account: account,
-
profile: profile,
-
action: auto_reply_enabled ? "auto_story_reply" : "sync_stories",
-
profile_action_log_id: profile_action_log_id
-
)
-
tagged_for_auto_reply = automatic_reply_enabled?(profile)
-
if require_auto_reply_tag && !tagged_for_auto_reply
-
action_log.mark_succeeded!(log_text: "Skipped: automatic_reply tag not present", extra_metadata: { skipped: true, reason: "missing_automatic_reply_tag" })
-
return
-
end
-
action_log.mark_running!(extra_metadata: {
-
queue_name: queue_name,
-
active_job_id: job_id,
-
max_stories: max_stories_i,
-
force_analyze_all: force,
-
auto_reply: auto_reply_enabled
-
})
-
Ops::StructuredLogger.info(
-
event: "profile_story_sync.started",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
profile_username: profile.username,
-
max_stories: max_stories_i,
-
force_analyze_all: force,
-
auto_reply: auto_reply_enabled
-
}
-
)
-
-
dataset = Instagram::Client.new(account: account).fetch_profile_story_dataset!(
-
username: profile.username,
-
stories_limit: max_stories_i
-
)
-
-
sync_profile_snapshot!(profile: profile, details: dataset[:profile] || {})
-
-
stories = Array(dataset[:stories]).first(max_stories_i)
-
downloaded_count = 0
-
reused_download_count = 0
-
analyzed_count = 0
-
reply_queued_count = 0
-
story_failures = []
-
-
stories.each do |story|
-
story_id = story[:story_id].to_s
-
next if story_id.blank?
-
-
# Capture HTML snapshot for debugging story skipping
-
capture_story_html_snapshot(profile: profile, story: story, story_index: stories.find_index(story))
-
-
if story[:api_should_skip]
-
profile.record_event!(
-
kind: "story_skipped_debug",
-
external_id: "story_skipped_debug:#{story_id}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: base_story_metadata(profile: profile, story: story).merge(
-
skip_reason: story[:api_external_profile_reason].to_s.presence || "api_external_profile_indicator",
-
skip_source: "api_story_item_attribution",
-
skip_targets: Array(story[:api_external_profile_targets]),
-
duplicate_download_prevented: latest_story_download_event(profile: profile, story_id: story_id).present?
-
)
-
)
-
skipped_download = download_skipped_story!(
-
account: account,
-
profile: profile,
-
story: story,
-
skip_reason: story[:api_external_profile_reason].to_s.presence || "api_external_profile_indicator"
-
)
-
downloaded_count += 1 if skipped_download[:downloaded]
-
reused_download_count += 1 if skipped_download[:reused]
-
next
-
end
-
-
already_processed = already_processed_story?(profile: profile, story_id: story_id)
-
if already_processed && !force
-
profile.record_event!(
-
kind: "story_skipped_debug",
-
external_id: "story_skipped_debug:#{story_id}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: base_story_metadata(profile: profile, story: story).merge(
-
skip_reason: "already_processed",
-
force_analyze_all: force,
-
story_index: stories.find_index(story),
-
total_stories: stories.size,
-
duplicate_download_prevented: latest_story_download_event(profile: profile, story_id: story_id).present?
-
)
-
)
-
next
-
end
-
-
upload_event = profile.record_event!(
-
kind: "story_uploaded",
-
external_id: "story_uploaded:#{story_id}",
-
occurred_at: story[:taken_at],
-
metadata: base_story_metadata(profile: profile, story: story)
-
)
-
-
viewed_at = Time.current
-
profile.update!(last_story_seen_at: viewed_at)
-
profile.recompute_last_active!
-
profile.save!
-
-
profile.record_event!(
-
kind: "story_viewed",
-
external_id: "story_viewed:#{story_id}:#{viewed_at.utc.iso8601(6)}",
-
occurred_at: viewed_at,
-
metadata: base_story_metadata(profile: profile, story: story).merge(viewed_at: viewed_at.iso8601)
-
)
-
-
media_url = story[:media_url].to_s.strip
-
next if media_url.blank?
-
-
existing_download_event = latest_story_download_event(profile: profile, story_id: story_id)
-
reused_media = load_existing_story_media(event: existing_download_event)
-
reused_media ||= load_cached_story_media_for_profile(
-
account: account,
-
profile: profile,
-
story: story
-
)
-
if reused_media
-
bytes = reused_media[:bytes]
-
content_type = reused_media[:content_type]
-
filename = reused_media[:filename]
-
downloaded_event = reused_media[:event]
-
reused_download_count += 1
-
else
-
bytes, content_type, filename = download_story_media(url: media_url, user_agent: account.user_agent)
-
downloaded_at = Time.current
-
downloaded_event = profile.record_event!(
-
kind: "story_downloaded",
-
external_id: "story_downloaded:#{story_id}:#{downloaded_at.utc.iso8601(6)}",
-
occurred_at: downloaded_at,
-
metadata: base_story_metadata(profile: profile, story: story).merge(
-
downloaded_at: downloaded_at.iso8601,
-
media_filename: filename,
-
media_content_type: content_type,
-
media_bytes: bytes.bytesize
-
)
-
)
-
-
downloaded_event.media.attach(io: StringIO.new(bytes), filename: filename, content_type: content_type)
-
InstagramProfileEvent.broadcast_story_archive_refresh!(account: account)
-
downloaded_count += 1
-
end
-
-
attach_media_to_event(upload_event, bytes: bytes, filename: filename, content_type: content_type)
-
ensure_story_preview_image!(
-
event: downloaded_event,
-
story: story,
-
media_bytes: bytes,
-
media_content_type: content_type,
-
user_agent: account.user_agent
-
)
-
ingested_story = ingest_story_for_processing(
-
account: account,
-
profile: profile,
-
story: story,
-
downloaded_event: downloaded_event,
-
bytes: bytes,
-
content_type: content_type,
-
filename: filename,
-
force_reprocess: force
-
)
-
-
analysis = analyze_story_for_comments(
-
account: account,
-
profile: profile,
-
story: story,
-
analyzable: downloaded_event,
-
media_fingerprint: media_fingerprint_for_story(story: story, bytes: bytes, content_type: content_type),
-
bytes: bytes,
-
content_type: content_type
-
)
-
-
next unless analysis[:ok]
-
-
analyzed_at = Time.current
-
profile.record_event!(
-
kind: "story_analyzed",
-
external_id: "story_analyzed:#{story_id}:#{analyzed_at.utc.iso8601(6)}",
-
occurred_at: analyzed_at,
-
metadata: base_story_metadata(profile: profile, story: story).merge(
-
analyzed_at: analyzed_at.iso8601,
-
ai_provider: analysis[:provider],
-
ai_model: analysis[:model],
-
ai_image_description: analysis[:image_description],
-
ai_comment_suggestions: analysis[:comment_suggestions],
-
instagram_story_id: ingested_story&.id
-
)
-
)
-
analyzed_count += 1
-
-
if auto_reply_enabled
-
decision = story_reply_decision(analysis: analysis, profile: profile, story_id: story_id)
-
-
if decision[:queue]
-
queued = queue_story_reply!(
-
account: account,
-
profile: profile,
-
story: story,
-
analysis: analysis,
-
downloaded_event: downloaded_event
-
)
-
reply_queued_count += 1 if queued
-
else
-
profile.record_event!(
-
kind: "story_reply_skipped",
-
external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: base_story_metadata(profile: profile, story: story).merge(
-
skip_reason: decision[:reason],
-
relevant: analysis[:relevant],
-
author_type: analysis[:author_type],
-
suggestions_count: Array(analysis[:comment_suggestions]).length
-
)
-
)
-
end
-
end
-
rescue StandardError => e
-
story_failures << {
-
story_id: story_id.presence || story[:story_id].to_s,
-
error_class: e.class.name,
-
error_message: e.message.to_s.byteslice(0, 220)
-
}
-
Ops::StructuredLogger.warn(
-
event: "profile_story_sync.story_failed",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
profile_username: profile.username,
-
story_id: story_id.presence || story[:story_id].to_s,
-
error_class: e.class.name,
-
error_message: e.message.to_s
-
}
-
)
-
next
-
end
-
-
Ops::StructuredLogger.info(
-
event: "profile_story_sync.completed",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
profile_username: profile.username,
-
stories_found: stories.size,
-
downloaded: downloaded_count,
-
reused_downloads: reused_download_count,
-
analyzed: analyzed_count,
-
replies_queued: reply_queued_count,
-
failed_story_count: story_failures.length
-
}
-
)
-
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Story sync completed for #{profile.username}. Stories: #{stories.size}, downloaded: #{downloaded_count}, reused: #{reused_download_count}, analyzed: #{analyzed_count}, replies queued: #{reply_queued_count}, failed: #{story_failures.length}." }
-
)
-
-
action_log.mark_succeeded!(
-
extra_metadata: {
-
stories_found: stories.size,
-
downloaded: downloaded_count,
-
reused_downloads: reused_download_count,
-
analyzed: analyzed_count,
-
replies_queued: reply_queued_count,
-
failed_story_count: story_failures.length,
-
failed_stories: story_failures.first(15)
-
},
-
log_text: "Synced #{stories.size} stories (downloaded: #{downloaded_count}, reused: #{reused_download_count}, analyzed: #{analyzed_count}, replies queued: #{reply_queued_count}, failed: #{story_failures.length})"
-
)
-
rescue StandardError => e
-
Ops::StructuredLogger.error(
-
event: "profile_story_sync.failed",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: account&.id,
-
instagram_profile_id: profile&.id,
-
profile_username: profile&.username,
-
error_class: e.class.name,
-
error_message: e.message.to_s
-
}
-
)
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Story sync failed: #{e.message}" }
-
) if account
-
action_log&.mark_failed!(error_message: e.message, extra_metadata: { active_job_id: job_id })
-
raise
-
end
-
-
private
-
-
def find_or_create_action_log(account:, profile:, action:, profile_action_log_id:)
-
log = profile.instagram_profile_action_logs.find_by(id: profile_action_log_id) if profile_action_log_id.present?
-
return log if log
-
-
profile.instagram_profile_action_logs.create!(
-
instagram_account: account,
-
action: action,
-
status: "queued",
-
trigger_source: "job",
-
occurred_at: Time.current,
-
active_job_id: job_id,
-
queue_name: queue_name,
-
metadata: { created_by: self.class.name }
-
)
-
end
-
-
def sync_profile_snapshot!(profile:, details:)
-
profile.update!(
-
display_name: details[:display_name].presence || profile.display_name,
-
profile_pic_url: details[:profile_pic_url].presence || profile.profile_pic_url,
-
ig_user_id: details[:ig_user_id].presence || profile.ig_user_id,
-
bio: details[:bio].presence || profile.bio,
-
last_post_at: details[:last_post_at].presence || profile.last_post_at
-
)
-
profile.recompute_last_active!
-
profile.save!
-
end
-
-
def base_story_metadata(profile:, story:)
-
{
-
source: "instagram_story_reel_api",
-
story_id: story[:story_id],
-
media_type: story[:media_type],
-
media_url: story[:media_url],
-
image_url: story[:image_url],
-
video_url: story[:video_url],
-
primary_media_source: story[:primary_media_source],
-
primary_media_index: story[:primary_media_index],
-
media_variants_count: Array(story[:media_variants]).length,
-
carousel_media: compact_story_media_variants(story[:carousel_media]),
-
can_reply: story[:can_reply],
-
can_reshare: story[:can_reshare],
-
owner_user_id: story[:owner_user_id],
-
owner_username: story[:owner_username],
-
api_has_external_profile_indicator: story[:api_has_external_profile_indicator],
-
api_external_profile_reason: story[:api_external_profile_reason],
-
api_external_profile_targets: story[:api_external_profile_targets],
-
api_should_skip: story[:api_should_skip],
-
caption: story[:caption],
-
permalink: story[:permalink],
-
upload_time: story[:taken_at]&.iso8601,
-
expiring_at: story[:expiring_at]&.iso8601,
-
profile_context: {
-
username: profile.username,
-
display_name: profile.display_name,
-
can_message: profile.can_message,
-
tags: profile.profile_tags.pluck(:name).sort,
-
bio: profile.bio.to_s.tr("\n", " ").byteslice(0, 260)
-
}
-
}
-
end
-
-
def compact_story_media_variants(variants)
-
Array(variants).first(8).filter_map do |entry|
-
data = entry.is_a?(Hash) ? entry : {}
-
media_url = data[:media_url] || data["media_url"]
-
next nil if media_url.to_s.blank?
-
-
{
-
source: (data[:source] || data["source"]).to_s.presence,
-
index: data[:index] || data["index"],
-
media_pk: (data[:media_pk] || data["media_pk"]).to_s.presence,
-
media_type: (data[:media_type] || data["media_type"]).to_s.presence,
-
media_url: media_url.to_s,
-
image_url: (data[:image_url] || data["image_url"]).to_s.presence,
-
video_url: (data[:video_url] || data["video_url"]).to_s.presence,
-
width: data[:width] || data["width"],
-
height: data[:height] || data["height"]
-
}.compact
-
end
-
rescue StandardError
-
[]
-
end
-
-
def automatic_reply_enabled?(profile)
-
profile.profile_tags.where(name: [ "automatic_reply", "automatic reply" ]).exists?
-
end
-
-
def already_processed_story?(profile:, story_id:)
-
profile.instagram_profile_events.where(kind: "story_uploaded", external_id: "story_uploaded:#{story_id}").exists?
-
end
-
-
def attach_media_to_event(event, bytes:, filename:, content_type:)
-
return unless event
-
return if event.media.attached?
-
-
event.media.attach(io: StringIO.new(bytes), filename: filename, content_type: content_type)
-
rescue StandardError
-
nil
-
end
-
-
def analyze_story_for_comments(account:, profile:, story:, analyzable:, media_fingerprint:, bytes:, content_type:)
-
media_payload = build_media_payload(story: story, bytes: bytes, content_type: content_type)
-
payload = build_story_payload(profile: profile, story: story)
-
-
run = Ai::Runner.new(account: account).analyze!(
-
purpose: "post",
-
analyzable: analyzable,
-
payload: payload,
-
media: media_payload,
-
media_fingerprint: media_fingerprint
-
)
-
-
analysis = run.dig(:result, :analysis)
-
return { ok: false } unless analysis.is_a?(Hash)
-
-
{
-
ok: true,
-
provider: run[:provider].key,
-
model: run.dig(:result, :model),
-
relevant: analysis["relevant"],
-
author_type: analysis["author_type"],
-
image_description: analysis["image_description"].to_s.presence,
-
comment_suggestions: Array(analysis["comment_suggestions"]).first(8)
-
}
-
rescue StandardError
-
{ ok: false }
-
end
-
-
def media_fingerprint_for_story(story:, bytes:, content_type:)
-
return Digest::SHA256.hexdigest(bytes) if bytes.present?
-
-
fallback = [
-
story[:media_url].to_s,
-
story[:image_url].to_s,
-
story[:video_url].to_s,
-
content_type.to_s
-
].find(&:present?)
-
return nil if fallback.blank?
-
-
Digest::SHA256.hexdigest(fallback)
-
end
-
-
def ensure_story_preview_image!(event:, story:, media_bytes:, media_content_type:, user_agent:)
-
return false unless event&.media&.attached?
-
return false unless event.media.blob&.content_type.to_s.start_with?("video/")
-
return true if event.preview_image.attached?
-
-
preview_url = preferred_story_preview_url(story: story)
-
if preview_url.present?
-
downloaded = download_preview_image(url: preview_url, user_agent: user_agent)
-
if downloaded
-
attach_preview_image_bytes!(
-
event: event,
-
image_bytes: downloaded[:bytes],
-
content_type: downloaded[:content_type],
-
filename: downloaded[:filename]
-
)
-
stamp_story_preview_metadata!(event: event, source: "remote_image_url")
-
return true
-
end
-
end
-
-
extracted = VideoThumbnailService.new.extract_first_frame(
-
video_bytes: media_bytes.to_s.b,
-
reference_id: "story_event_#{event.id}",
-
content_type: media_content_type
-
)
-
return false unless extracted[:ok]
-
-
attach_preview_image_bytes!(
-
event: event,
-
image_bytes: extracted[:image_bytes],
-
content_type: extracted[:content_type],
-
filename: extracted[:filename]
-
)
-
stamp_story_preview_metadata!(event: event, source: "ffmpeg_first_frame")
-
true
-
rescue StandardError => e
-
Rails.logger.warn("[SyncInstagramProfileStoriesJob] preview attach failed event_id=#{event&.id}: #{e.class}: #{e.message}")
-
false
-
end
-
-
def preferred_story_preview_url(story:)
-
candidates = [
-
story[:image_url].to_s,
-
story[:thumbnail_url].to_s,
-
story[:preview_image_url].to_s
-
]
-
-
Array(story[:carousel_media]).each do |entry|
-
data = entry.is_a?(Hash) ? entry : {}
-
candidates << data[:image_url].to_s
-
candidates << data["image_url"].to_s
-
end
-
-
candidates.map(&:strip).find(&:present?)
-
rescue StandardError
-
nil
-
end
-
-
def download_preview_image(url:, user_agent:, redirects_left: 3)
-
uri = URI.parse(url)
-
return nil unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
-
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.use_ssl = (uri.scheme == "https")
-
http.open_timeout = 8
-
http.read_timeout = 20
-
-
req = Net::HTTP::Get.new(uri.request_uri)
-
req["Accept"] = "image/*,*/*;q=0.8"
-
req["User-Agent"] = user_agent.to_s.presence || "Mozilla/5.0"
-
req["Referer"] = Instagram::Client::INSTAGRAM_BASE_URL
-
res = http.request(req)
-
-
if res.is_a?(Net::HTTPRedirection) && res["location"].present?
-
return nil if redirects_left.to_i <= 0
-
-
redirected_url = normalize_redirect_url(base_uri: uri, location: res["location"])
-
return nil if redirected_url.blank?
-
-
return download_preview_image(url: redirected_url, user_agent: user_agent, redirects_left: redirects_left.to_i - 1)
-
end
-
-
return nil unless res.is_a?(Net::HTTPSuccess)
-
-
body = res.body.to_s.b
-
return nil if body.bytesize <= 0 || body.bytesize > MAX_PREVIEW_IMAGE_BYTES
-
return nil if html_payload?(body)
-
-
content_type = res["content-type"].to_s.split(";").first.to_s
-
return nil unless content_type.start_with?("image/")
-
-
validate_known_signature!(body: body, content_type: content_type)
-
ext = extension_for_content_type(content_type: content_type)
-
-
{
-
bytes: body,
-
content_type: content_type,
-
filename: "story_preview_#{Digest::SHA256.hexdigest(url)[0, 12]}.#{ext}"
-
}
-
rescue StandardError
-
nil
-
end
-
-
def attach_preview_image_bytes!(event:, image_bytes:, content_type:, filename:)
-
blob = ActiveStorage::Blob.create_and_upload!(
-
io: StringIO.new(image_bytes),
-
filename: filename,
-
content_type: content_type.to_s.presence || "image/jpeg",
-
identify: false
-
)
-
attach_preview_blob_to_event!(event: event, blob: blob)
-
end
-
-
def attach_preview_blob_to_event!(event:, blob:)
-
return unless blob
-
-
if event.preview_image.attached? && event.preview_image.attachment.present?
-
attachment = event.preview_image.attachment
-
attachment.update!(blob: blob) if attachment.blob_id != blob.id
-
return
-
end
-
-
event.preview_image.attach(blob)
-
end
-
-
def stamp_story_preview_metadata!(event:, source:)
-
metadata = event.metadata.is_a?(Hash) ? event.metadata.deep_dup : {}
-
metadata["preview_image_status"] = "attached"
-
metadata["preview_image_source"] = source.to_s
-
metadata["preview_image_attached_at"] = Time.current.utc.iso8601(3)
-
event.update!(metadata: metadata)
-
rescue StandardError
-
nil
-
end
-
-
def build_story_payload(profile:, story:)
-
story_history = recent_story_history_context(profile: profile)
-
history_narrative = profile.history_narrative_text(max_chunks: 3)
-
history_chunks = profile.history_narrative_chunks(max_chunks: 6)
-
recent_post_context = profile.instagram_profile_posts.recent_first.limit(5).map do |p|
-
{
-
shortcode: p.shortcode,
-
caption: p.caption.to_s,
-
taken_at: p.taken_at&.iso8601,
-
image_description: p.analysis.is_a?(Hash) ? p.analysis["image_description"] : nil,
-
topics: p.analysis.is_a?(Hash) ? Array(p.analysis["topics"]).first(6) : []
-
}
-
end
-
recent_event_context = profile.instagram_profile_events.order(detected_at: :desc).limit(20).pluck(:kind, :occurred_at).map do |kind, occurred_at|
-
{ kind: kind, occurred_at: occurred_at&.iso8601 }
-
end
-
-
{
-
post: {
-
shortcode: story[:story_id],
-
caption: story[:caption],
-
taken_at: story[:taken_at]&.iso8601,
-
permalink: story[:permalink],
-
likes_count: nil,
-
comments_count: nil,
-
comments: []
-
},
-
author_profile: {
-
username: profile.username,
-
display_name: profile.display_name,
-
bio: profile.bio,
-
can_message: profile.can_message,
-
tags: profile.profile_tags.pluck(:name).sort,
-
recent_posts: recent_post_context,
-
recent_profile_events: recent_event_context,
-
recent_story_history: story_history,
-
historical_narrative_text: history_narrative,
-
historical_narrative_chunks: history_chunks
-
},
-
rules: {
-
require_manual_review: true,
-
style: "gen_z_light",
-
context: "story_reply_suggestion",
-
only_if_relevant: true,
-
diversity_requirement: "Prefer novel comments and avoid repeating previous story replies."
-
}
-
}
-
end
-
-
def story_reply_decision(analysis:, profile:, story_id:)
-
return { queue: false, reason: "already_sent" } if story_reply_already_sent?(profile: profile, story_id: story_id)
-
return { queue: false, reason: "official_messaging_not_configured" } unless official_messaging_service.configured?
-
-
relevant = analysis[:relevant]
-
author_type = analysis[:author_type].to_s
-
suggestions = Array(analysis[:comment_suggestions]).map(&:to_s).reject(&:blank?)
-
-
return { queue: false, reason: "no_comment_suggestions" } if suggestions.empty?
-
return { queue: false, reason: "not_relevant" } unless relevant == true
-
-
allowed_types = %w[personal_user friend relative unknown]
-
return { queue: false, reason: "author_type_#{author_type.presence || 'missing'}_not_allowed" } unless allowed_types.include?(author_type)
-
-
{ queue: true, reason: "eligible_for_reply" }
-
end
-
-
def story_reply_already_sent?(profile:, story_id:)
-
profile.instagram_profile_events.where(kind: "story_reply_sent", external_id: "story_reply_sent:#{story_id}").exists?
-
end
-
-
def queue_story_reply!(account:, profile:, story:, analysis:, downloaded_event: nil)
-
story_id = story[:story_id].to_s
-
suggestion = select_unique_story_comment(profile: profile, suggestions: Array(analysis[:comment_suggestions]))
-
return false if suggestion.blank?
-
-
result = official_messaging_service.send_text!(
-
recipient_id: profile.ig_user_id.presence || profile.username,
-
text: suggestion,
-
context: {
-
source: "story_auto_reply",
-
story_id: story_id
-
}
-
)
-
-
message = account.instagram_messages.create!(
-
instagram_profile: profile,
-
direction: "outgoing",
-
body: suggestion,
-
status: "sent",
-
sent_at: Time.current
-
)
-
-
profile.record_event!(
-
kind: "story_reply_sent",
-
external_id: "story_reply_sent:#{story_id}",
-
occurred_at: Time.current,
-
metadata: base_story_metadata(profile: profile, story: story).merge(
-
ai_reply_text: suggestion,
-
auto_reply: true,
-
instagram_message_id: message.id,
-
provider_message_id: result[:provider_message_id]
-
)
-
)
-
attach_reply_comment_to_downloaded_event!(downloaded_event: downloaded_event, comment_text: suggestion)
-
true
-
rescue StandardError => e
-
account.instagram_messages.create!(
-
instagram_profile: profile,
-
direction: "outgoing",
-
body: suggestion.to_s,
-
status: "failed",
-
error_message: e.message.to_s
-
) if suggestion.present?
-
false
-
end
-
-
def official_messaging_service
-
@official_messaging_service ||= Messaging::IntegrationService.new
-
end
-
-
def attach_reply_comment_to_downloaded_event!(downloaded_event:, comment_text:)
-
return if downloaded_event.blank? || comment_text.blank?
-
-
meta = downloaded_event.metadata.is_a?(Hash) ? downloaded_event.metadata.deep_dup : {}
-
meta["reply_comment"] = comment_text.to_s
-
downloaded_event.update!(metadata: meta)
-
end
-
-
def download_skipped_story!(account:, profile:, story:, skip_reason:)
-
story_id = story[:story_id].to_s
-
existing_event = latest_story_download_event(profile: profile, story_id: story_id)
-
if existing_event&.media&.attached?
-
return { downloaded: false, reused: true, event: existing_event }
-
end
-
reused_media = load_cached_story_media_for_profile(
-
account: account,
-
profile: profile,
-
story: story,
-
skip_reason: skip_reason
-
)
-
return { downloaded: false, reused: true, event: reused_media[:event] } if reused_media
-
-
media_url = story[:media_url].to_s.strip
-
return { downloaded: false, reused: false, event: nil } if media_url.blank?
-
-
bytes, content_type, filename = download_story_media(url: media_url, user_agent: account.user_agent)
-
downloaded_at = Time.current
-
event = profile.record_event!(
-
kind: "story_downloaded",
-
external_id: "story_downloaded:#{story_id}:#{downloaded_at.utc.iso8601(6)}",
-
occurred_at: downloaded_at,
-
metadata: base_story_metadata(profile: profile, story: story).merge(
-
skipped: true,
-
skip_reason: skip_reason.to_s,
-
downloaded_at: downloaded_at.iso8601,
-
media_filename: filename,
-
media_content_type: content_type,
-
media_bytes: bytes.bytesize
-
)
-
)
-
event.media.attach(io: StringIO.new(bytes), filename: filename, content_type: content_type)
-
InstagramProfileEvent.broadcast_story_archive_refresh!(account: account)
-
{ downloaded: true, reused: false, event: event }
-
rescue StandardError
-
{ downloaded: false, reused: false, event: nil }
-
end
-
-
def recent_story_history_context(profile:)
-
profile.instagram_profile_events
-
.where(kind: [ "story_analyzed", "story_reply_sent", "story_comment_posted_via_feed" ])
-
.order(detected_at: :desc, id: :desc)
-
.limit(25)
-
.map do |event|
-
metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
-
{
-
kind: event.kind,
-
occurred_at: event.occurred_at&.iso8601 || event.detected_at&.iso8601,
-
story_id: metadata["story_id"].to_s.presence,
-
image_description: metadata["ai_image_description"].to_s.presence,
-
posted_comment: metadata["ai_reply_text"].to_s.presence || metadata["comment_text"].to_s.presence
-
}.compact
-
end
-
end
-
-
def select_unique_story_comment(profile:, suggestions:)
-
candidates = Array(suggestions).map(&:to_s).map(&:strip).reject(&:blank?)
-
return nil if candidates.empty?
-
-
history = profile.instagram_profile_events
-
.where(kind: [ "story_reply_sent", "story_comment_posted_via_feed" ])
-
.order(detected_at: :desc, id: :desc)
-
.limit(40)
-
.map { |e| e.metadata.is_a?(Hash) ? (e.metadata["ai_reply_text"].to_s.presence || e.metadata["comment_text"].to_s) : "" }
-
.reject(&:blank?)
-
-
return candidates.first if history.empty?
-
-
ranked = candidates.sort_by do |candidate|
-
max_similarity = history.map { |past| text_similarity(candidate, past) }.max.to_f
-
max_similarity
-
end
-
ranked.find { |c| history.all? { |past| text_similarity(c, past) < 0.72 } } || ranked.first
-
end
-
-
def text_similarity(a, b)
-
left = tokenize(a)
-
right = tokenize(b)
-
return 0.0 if left.empty? || right.empty?
-
-
overlap = (left & right).length.to_f
-
overlap / [ left.length, right.length ].max.to_f
-
end
-
-
def tokenize(text)
-
text.to_s.downcase.scan(/[a-z0-9]+/).uniq
-
end
-
-
def build_media_payload(story:, bytes:, content_type:)
-
media_type = story[:media_type].to_s
-
-
if media_type == "video"
-
{
-
type: "video",
-
content_type: content_type,
-
bytes: bytes.bytesize <= MAX_INLINE_VIDEO_BYTES ? bytes : nil
-
}
-
else
-
payload = {
-
type: "image",
-
content_type: content_type,
-
bytes: bytes
-
}
-
-
if bytes.bytesize <= MAX_INLINE_IMAGE_BYTES
-
payload[:image_data_url] = "data:#{content_type};base64,#{Base64.strict_encode64(bytes)}"
-
end
-
-
payload
-
end
-
end
-
-
def download_story_media(url:, user_agent:)
-
uri = URI.parse(url)
-
raise "Invalid story media URL" unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
-
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.use_ssl = (uri.scheme == "https")
-
http.open_timeout = 10
-
http.read_timeout = 30
-
-
req = Net::HTTP::Get.new(uri.request_uri)
-
req["User-Agent"] = user_agent.presence || "Mozilla/5.0"
-
req["Accept"] = "*/*"
-
req["Referer"] = "https://www.instagram.com/"
-
-
res = http.request(req)
-
-
if res.is_a?(Net::HTTPRedirection) && res["location"].present?
-
return download_story_media(url: res["location"], user_agent: user_agent)
-
end
-
-
raise "HTTP #{res.code}" unless res.is_a?(Net::HTTPSuccess)
-
-
bytes = res.body.to_s
-
raise "Empty story media body" if bytes.blank?
-
-
content_type = res["content-type"].to_s.split(";").first.presence || "application/octet-stream"
-
ext = extension_for_content_type(content_type: content_type)
-
digest = Digest::SHA256.hexdigest("#{uri.path}-#{bytes.bytesize}")[0, 12]
-
filename = "story_#{digest}.#{ext}"
-
-
[ bytes, content_type, filename ]
-
end
-
-
def extension_for_content_type(content_type:)
-
return "jpg" if content_type.include?("jpeg")
-
return "png" if content_type.include?("png")
-
return "webp" if content_type.include?("webp")
-
return "mp4" if content_type.include?("mp4")
-
return "mov" if content_type.include?("quicktime")
-
-
"bin"
-
end
-
-
def normalize_redirect_url(base_uri:, location:)
-
target = URI.join(base_uri.to_s, location.to_s).to_s
-
parsed = URI.parse(target)
-
return nil unless parsed.is_a?(URI::HTTP) || parsed.is_a?(URI::HTTPS)
-
-
parsed.to_s
-
rescue URI::InvalidURIError, ArgumentError
-
nil
-
end
-
-
def html_payload?(body)
-
sample = body.to_s.byteslice(0, 4096).to_s.downcase
-
sample.include?("<html") || sample.start_with?("<!doctype html")
-
end
-
-
def validate_known_signature!(body:, content_type:)
-
type = content_type.to_s.downcase
-
return if type.blank?
-
return if type.include?("octet-stream")
-
-
case
-
when type.include?("jpeg")
-
raise "invalid jpeg signature" unless body.start_with?("\xFF\xD8".b)
-
when type.include?("png")
-
raise "invalid png signature" unless body.start_with?("\x89PNG\r\n\x1A\n".b)
-
when type.include?("gif")
-
raise "invalid gif signature" unless body.start_with?("GIF87a".b) || body.start_with?("GIF89a".b)
-
when type.include?("webp")
-
raise "invalid webp signature" unless body.bytesize >= 12 && body.byteslice(0, 4) == "RIFF" && body.byteslice(8, 4) == "WEBP"
-
when type.start_with?("video/")
-
raise "invalid video signature" unless body.bytesize >= 12 && body.byteslice(4, 4) == "ftyp"
-
end
-
end
-
-
def ingest_story_for_processing(account:, profile:, story:, downloaded_event:, bytes:, content_type:, filename:, force_reprocess:)
-
StoryIngestionService.new(account: account, profile: profile).ingest!(
-
story: story,
-
source_event: downloaded_event,
-
bytes: bytes,
-
content_type: content_type,
-
filename: filename,
-
force_reprocess: force_reprocess
-
)
-
rescue StandardError => e
-
Rails.logger.warn("[SyncInstagramProfileStoriesJob] story ingestion failed story_id=#{story[:story_id]}: #{e.class}: #{e.message}")
-
nil
-
end
-
-
def latest_story_download_event(profile:, story_id:)
-
profile.instagram_profile_events
-
.joins(:media_attachment)
-
.with_attached_media
-
.where(kind: "story_downloaded")
-
.where("external_id LIKE ?", "story_downloaded:#{story_id}:%")
-
.order(detected_at: :desc, id: :desc)
-
.first
-
end
-
-
def load_existing_story_media(event:)
-
return nil unless event&.media&.attached?
-
-
blob = event.media.blob
-
{
-
event: event,
-
bytes: blob.download,
-
content_type: blob.content_type.to_s.presence || "application/octet-stream",
-
filename: blob.filename.to_s.presence || "story_#{event.id}.bin"
-
}
-
rescue StandardError
-
nil
-
end
-
-
def load_cached_story_media_for_profile(account:, profile:, story:, skip_reason: nil)
-
story_id = story[:story_id].to_s.strip
-
return nil if story_id.blank?
-
-
cache_hit = find_cached_story_media(story_id: story_id, excluding_profile_id: profile.id)
-
return nil unless cache_hit
-
-
event = build_cached_story_download_event(
-
account: account,
-
profile: profile,
-
story: story,
-
story_id: story_id,
-
blob: cache_hit[:blob],
-
cache_source: cache_hit[:source],
-
cache_source_id: cache_hit[:source_id],
-
skip_reason: skip_reason
-
)
-
return nil unless event
-
-
load_existing_story_media(event: event)
-
rescue StandardError => e
-
Rails.logger.warn("[SyncInstagramProfileStoriesJob] cached media reuse failed for story_id=#{story_id}: #{e.class}: #{e.message}")
-
nil
-
end
-
-
def find_cached_story_media(story_id:, excluding_profile_id:)
-
cached_story = InstagramStory
-
.joins(:media_attachment)
-
.where(story_id: story_id)
-
.where.not(instagram_profile_id: excluding_profile_id)
-
.order(taken_at: :desc, id: :desc)
-
.first
-
if cached_story&.media&.attached?
-
return { blob: cached_story.media.blob, source: "instagram_story", source_id: cached_story.id }
-
end
-
-
cached_event = InstagramProfileEvent
-
.joins(:media_attachment)
-
.with_attached_media
-
.where(kind: "story_downloaded")
-
.where.not(instagram_profile_id: excluding_profile_id)
-
.where("external_id LIKE ?", "story_downloaded:#{story_id}:%")
-
.order(detected_at: :desc, id: :desc)
-
.first
-
return nil unless cached_event&.media&.attached?
-
-
{ blob: cached_event.media.blob, source: "instagram_profile_event", source_id: cached_event.id }
-
end
-
-
def build_cached_story_download_event(account:, profile:, story:, story_id:, blob:, cache_source:, cache_source_id:, skip_reason: nil)
-
downloaded_at = Time.current
-
metadata = base_story_metadata(profile: profile, story: story).merge(
-
downloaded_at: downloaded_at.iso8601,
-
media_filename: blob.filename.to_s,
-
media_content_type: blob.content_type.to_s,
-
media_bytes: blob.byte_size.to_i,
-
reused_local_cache: true,
-
reused_local_cache_source: cache_source.to_s,
-
reused_local_cache_source_id: cache_source_id
-
)
-
metadata[:skip_reason] = skip_reason.to_s if skip_reason.present?
-
metadata[:skipped] = true if skip_reason.present?
-
-
event = profile.record_event!(
-
kind: "story_downloaded",
-
external_id: "story_downloaded:#{story_id}:#{downloaded_at.utc.iso8601(6)}",
-
occurred_at: downloaded_at,
-
metadata: metadata
-
)
-
event.media.attach(blob) unless event.media.attached?
-
InstagramProfileEvent.broadcast_story_archive_refresh!(account: account)
-
event
-
end
-
-
def capture_story_html_snapshot(profile:, story:, story_index:)
-
return unless story.present?
-
-
begin
-
# Create debug directory if it doesn't exist
-
debug_dir = Rails.root.join("tmp", "story_debug_snapshots")
-
FileUtils.mkdir_p(debug_dir) unless Dir.exist?(debug_dir)
-
-
# Generate filename with timestamp and story info
-
timestamp = Time.current.strftime("%Y%m%d_%H%M%S_%L")
-
filename = "#{profile.username}_story_#{story_index}_#{story[:story_id]}_#{timestamp}.html"
-
filepath = File.join(debug_dir, filename)
-
-
# Create HTML content with story metadata and DOM structure analysis
-
html_content = <<~HTML
-
<!DOCTYPE html>
-
<html>
-
<head>
-
<title>Story Debug Snapshot - #{profile.username} - Story #{story_index}</title>
-
<style>
-
body { font-family: Arial, sans-serif; margin: 20px; }
-
.header { background: #f0f0f0; padding: 10px; border-radius: 5px; margin-bottom: 20px; }
-
.metadata { background: #fff9e6; padding: 10px; border-radius: 5px; margin-bottom: 20px; }
-
.analysis { background: #e6f3ff; padding: 10px; border-radius: 5px; margin-bottom: 20px; }
-
.events { background: #ffe6e6; padding: 10px; border-radius: 5px; }
-
pre { background: #f5f5f5; padding: 10px; border-radius: 3px; overflow-x: auto; }
-
.story-id { color: #0066cc; font-weight: bold; }
-
.skip-reason { color: #cc0000; font-weight: bold; }
-
</style>
-
</head>
-
<body>
-
<div class="header">
-
<h1>Story Debug Snapshot</h1>
-
<p><strong>Profile:</strong> #{profile.username} (ID: #{profile.id})</p>
-
<p><strong>Story Index:</strong> #{story_index} / #{Array(story).size}</p>
-
<p><strong>Captured At:</strong> #{Time.current.iso8601}</p>
-
</div>
-
-
<div class="metadata">
-
<h2>Story Metadata</h2>
-
<pre>#{JSON.pretty_generate(story)}</pre>
-
</div>
-
-
<div class="analysis">
-
<h2>Processing Analysis</h2>
-
<p><strong>Story ID:</strong> <span class="story-id">#{story[:story_id]}</span></p>
-
<p><strong>Already Processed:</strong> #{already_processed_story?(profile: profile, story_id: story[:story_id].to_s)}</p>
-
<p><strong>Media URL:</strong> #{story[:media_url]}</p>
-
<p><strong>Taken At:</strong> #{story[:taken_at]}</p>
-
<p><strong>Expiring At:</strong> #{story[:expiring_at]}</p>
-
</div>
-
-
<div class="events">
-
<h2>Recent Story Events for this Profile</h2>
-
<pre>#{JSON.pretty_generate(recent_story_events_for_debug(profile: profile))}</pre>
-
</div>
-
</body>
-
</html>
-
HTML
-
-
# Write HTML snapshot to file
-
File.write(filepath, html_content)
-
-
# Log the snapshot creation
-
Rails.logger.info "[STORY_DEBUG] HTML snapshot created: #{filepath}"
-
-
# Record snapshot event in the database
-
profile.record_event!(
-
kind: "story_html_snapshot",
-
external_id: "story_html_snapshot:#{story[:story_id]}:#{timestamp}",
-
occurred_at: Time.current,
-
metadata: base_story_metadata(profile: profile, story: story).merge(
-
snapshot_filename: filename,
-
snapshot_path: filepath,
-
story_index: story_index,
-
captured_at: Time.current.iso8601
-
)
-
)
-
-
rescue StandardError => e
-
Rails.logger.error "[STORY_DEBUG] Failed to capture HTML snapshot: #{e.message}"
-
# Don't fail the entire job if snapshot capture fails
-
end
-
end
-
-
def recent_story_events_for_debug(profile:)
-
profile.instagram_profile_events
-
.where(kind: [ "story_uploaded", "story_viewed", "story_analyzed", "story_skipped_debug" ])
-
.order(occurred_at: :desc, id: :desc)
-
.limit(20)
-
.map do |event|
-
{
-
id: event.id,
-
kind: event.kind,
-
external_id: event.external_id,
-
occurred_at: event.occurred_at&.iso8601,
-
metadata: event.metadata.is_a?(Hash) ? event.metadata.slice("story_id", "skip_reason", "force_analyze_all", "story_index", "total_stories") : {}
-
}
-
end
-
end
-
end
-
class SyncNextProfilesForAccountJob < ApplicationJob
-
queue_as :profiles
-
-
def perform(instagram_account_id:, limit: 10)
-
account = InstagramAccount.find(instagram_account_id)
-
cap = limit.to_i.clamp(1, 50)
-
-
profiles = account.instagram_profiles
-
.order(Arel.sql("COALESCE(last_synced_at, '1970-01-01') ASC, COALESCE(last_active_at, '1970-01-01') DESC, username ASC"))
-
.limit(cap)
-
-
profiles.each do |profile|
-
log = profile.instagram_profile_action_logs.create!(
-
instagram_account: account,
-
action: "fetch_profile_details",
-
status: "queued",
-
trigger_source: "account_sync_next_profiles",
-
occurred_at: Time.current,
-
metadata: { requested_by: self.class.name, limit: cap }
-
)
-
-
job = FetchInstagramProfileDetailsJob.perform_later(
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
profile_action_log_id: log.id
-
)
-
log.update!(active_job_id: job.job_id, queue_name: job.queue_name)
-
rescue StandardError => e
-
Ops::StructuredLogger.warn(
-
event: "sync_next_profiles.profile_enqueue_failed",
-
payload: {
-
account_id: account.id,
-
profile_id: profile.id,
-
error_class: e.class.name,
-
error_message: e.message
-
}
-
)
-
next
-
end
-
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Queued profile sync for next #{profiles.size} profiles." }
-
)
-
end
-
end
-
class SyncProfileStoriesForAccountJob < ApplicationJob
-
queue_as :story_downloads
-
-
STORY_BATCH_LIMIT = 10
-
STORIES_PER_PROFILE = SyncInstagramProfileStoriesJob::MAX_STORIES
-
-
def perform(
-
instagram_account_id:,
-
story_limit: STORY_BATCH_LIMIT,
-
stories_per_profile: STORIES_PER_PROFILE,
-
with_comments: false,
-
require_auto_reply_tag: false,
-
force_analyze_all: false
-
)
-
account = InstagramAccount.find_by(id: instagram_account_id)
-
unless account
-
Ops::StructuredLogger.info(
-
event: "sync_profile_stories.skipped_missing_account",
-
payload: { instagram_account_id: instagram_account_id }
-
)
-
return
-
end
-
-
limit = story_limit.to_i.clamp(1, STORY_BATCH_LIMIT)
-
stories_per_profile_i = stories_per_profile.to_i.clamp(1, SyncInstagramProfileStoriesJob::MAX_STORIES)
-
auto_reply = ActiveModel::Type::Boolean.new.cast(with_comments)
-
require_tag = ActiveModel::Type::Boolean.new.cast(require_auto_reply_tag)
-
force_analyze = ActiveModel::Type::Boolean.new.cast(force_analyze_all)
-
-
scope = account.instagram_profiles
-
.order(Arel.sql("COALESCE(last_story_seen_at, '1970-01-01') ASC, COALESCE(last_active_at, '1970-01-01') DESC, username ASC"))
-
if require_tag
-
tagged_profiles = account.instagram_profiles
-
.joins(:profile_tags)
-
.where(profile_tags: { name: [ "automatic_reply", "automatic reply", "auto_reply", "auto reply" ] })
-
.select(:id)
-
scope = scope.where(id: tagged_profiles)
-
end
-
-
profiles = scope.limit(limit)
-
-
profiles.each do |profile|
-
action = auto_reply ? "auto_story_reply" : "sync_stories"
-
log = profile.instagram_profile_action_logs.create!(
-
instagram_account: account,
-
action: action,
-
status: "queued",
-
trigger_source: auto_reply ? "account_sync_stories_with_comments" : "account_sync_profile_stories",
-
occurred_at: Time.current,
-
metadata: {
-
requested_by: self.class.name,
-
story_limit: limit,
-
max_stories_per_profile: stories_per_profile_i,
-
auto_reply: auto_reply,
-
require_auto_reply_tag: require_tag,
-
force_analyze_all: force_analyze
-
}
-
)
-
-
job = SyncInstagramProfileStoriesJob.perform_later(
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
profile_action_log_id: log.id,
-
max_stories: stories_per_profile_i,
-
force_analyze_all: force_analyze,
-
auto_reply: auto_reply,
-
require_auto_reply_tag: require_tag
-
)
-
log.update!(active_job_id: job.job_id, queue_name: job.queue_name)
-
rescue StandardError => e
-
Ops::StructuredLogger.warn(
-
event: "sync_profile_stories.profile_enqueue_failed",
-
payload: {
-
account_id: account.id,
-
profile_id: profile.id,
-
error_class: e.class.name,
-
error_message: e.message
-
}
-
)
-
next
-
end
-
-
label = auto_reply ? "story sync with auto-reply" : "story sync"
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Queued #{label} for #{profiles.size} stories (max #{STORY_BATCH_LIMIT})." }
-
)
-
end
-
end
-
require "set"
-
-
class SyncRecentProfilePostsForProfileJob < ApplicationJob
-
class TransientProfileScanError < StandardError; end
-
-
queue_as :post_downloads
-
-
VISITED_TAG = "profile_posts_scanned".freeze
-
ANALYZED_TAG = "profile_posts_analyzed".freeze
-
MAX_POST_AGE_DAYS = 5
-
PROFILE_SCAN_LOCK_NAMESPACE = 92_347
-
-
retry_on Net::OpenTimeout, Net::ReadTimeout, wait: :polynomially_longer, attempts: 4
-
retry_on Errno::ECONNREFUSED, Errno::ECONNRESET, wait: :polynomially_longer, attempts: 4
-
retry_on Timeout::Error, wait: :polynomially_longer, attempts: 3
-
retry_on TransientProfileScanError, wait: :polynomially_longer, attempts: 3
-
selenium_timeout_error = "Selenium::WebDriver::Error::TimeoutError".safe_constantize
-
retry_on selenium_timeout_error, wait: :polynomially_longer, attempts: 2 if selenium_timeout_error
-
-
def perform(instagram_account_id:, instagram_profile_id:, posts_limit: 3, comments_limit: 8)
-
account = InstagramAccount.find(instagram_account_id)
-
profile = account.instagram_profiles.find(instagram_profile_id)
-
posts_limit_i = posts_limit.to_i.clamp(1, 3)
-
comments_limit_i = comments_limit.to_i.clamp(1, 20)
-
lock_acquired = claim_profile_scan_lock!(profile_id: profile.id)
-
unless lock_acquired
-
Ops::StructuredLogger.info(
-
event: "profile_scan.skipped_duplicate_execution",
-
payload: {
-
active_job_id: job_id,
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id
-
}
-
)
-
return
-
end
-
-
action_log = profile.instagram_profile_action_logs.create!(
-
instagram_account: account,
-
action: "analyze_profile",
-
status: "queued",
-
trigger_source: "recurring_profile_recent_posts_scan",
-
occurred_at: Time.current,
-
active_job_id: job_id,
-
queue_name: queue_name,
-
metadata: { posts_limit: posts_limit_i, comments_limit: comments_limit_i }
-
)
-
action_log.mark_running!(extra_metadata: { active_job_id: job_id, queue_name: queue_name })
-
-
story_result = fetch_story_dataset_with_fallback(account: account, profile: profile)
-
story_dataset = story_result[:dataset]
-
story_fetch_warning = story_result[:warning]
-
update_story_activity!(profile: profile, story_dataset: story_dataset)
-
policy_decision = Instagram::ProfileScanPolicy.new(profile: profile, profile_details: story_dataset[:profile]).decision
-
if policy_decision[:skip_scan]
-
handle_policy_skip!(
-
account: account,
-
profile: profile,
-
action_log: action_log,
-
decision: policy_decision,
-
story_dataset: story_dataset,
-
story_fetch_warning: story_fetch_warning
-
)
-
return
-
end
-
Instagram::ProfileScanPolicy.clear_scan_excluded!(profile: profile)
-
-
existing_shortcodes = profile.instagram_profile_posts.pluck(:shortcode).to_set
-
collected = Instagram::ProfileAnalysisCollector.new(account: account, profile: profile).collect_and_persist!(
-
posts_limit: posts_limit_i,
-
comments_limit: comments_limit_i
-
)
-
persisted_posts = Array(collected[:posts])
-
feed_fetch = collected.dig(:summary, :feed_fetch)
-
new_posts = persisted_posts.reject { |post| existing_shortcodes.include?(post.shortcode) }
-
recent_cutoff = MAX_POST_AGE_DAYS.days.ago
-
new_recent_posts = new_posts.select { |post| post.taken_at.present? && post.taken_at >= recent_cutoff }
-
analysis_enqueue_failures = 0
-
-
new_recent_posts.each do |post|
-
post.update!(ai_status: "pending") if post.ai_status == "failed"
-
AnalyzeInstagramProfilePostJob.perform_later(
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
task_flags: {
-
generate_comments: true,
-
enforce_comment_evidence_policy: true,
-
retry_on_incomplete_profile: true
-
}
-
)
-
rescue StandardError => enqueue_error
-
analysis_enqueue_failures += 1
-
Rails.logger.warn(
-
"[SyncRecentProfilePostsForProfileJob] analyze enqueue failed for profile_post_id=#{post.id} " \
-
"(profile_id=#{profile.id}): #{enqueue_error.class}: #{enqueue_error.message}"
-
)
-
next
-
end
-
-
apply_scan_tags!(profile: profile, has_new_posts: new_recent_posts.any?)
-
profile.update!(last_synced_at: Time.current, ai_last_analyzed_at: Time.current)
-
-
profile.record_event!(
-
kind: "profile_recent_posts_scanned",
-
external_id: "profile_recent_posts_scanned:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "recurring_profile_recent_posts_scan",
-
stories_detected: Array(story_dataset[:stories]).length,
-
latest_posts_fetched: persisted_posts.length,
-
new_posts_enqueued_for_analysis: new_recent_posts.length,
-
stale_posts_skipped_from_analysis: (new_posts.length - new_recent_posts.length),
-
analysis_enqueue_failures: analysis_enqueue_failures,
-
story_dataset_degraded: story_fetch_warning[:degraded],
-
story_dataset_error_class: story_fetch_warning[:error_class],
-
story_dataset_error_message: story_fetch_warning[:error_message]
-
}
-
)
-
-
action_log.mark_succeeded!(
-
extra_metadata: {
-
stories_detected: Array(story_dataset[:stories]).length,
-
fetched_posts: persisted_posts.length,
-
new_posts: new_recent_posts.length,
-
stale_posts_skipped_from_analysis: (new_posts.length - new_recent_posts.length),
-
analysis_enqueue_failures: analysis_enqueue_failures,
-
feed_fetch: feed_fetch.is_a?(Hash) ? feed_fetch : {},
-
story_dataset_degraded: story_fetch_warning[:degraded],
-
story_dataset_error_class: story_fetch_warning[:error_class],
-
story_dataset_error_message: story_fetch_warning[:error_message]
-
},
-
log_text: "Scanned latest #{posts_limit_i} posts. New recent posts queued: #{new_recent_posts.length}, stale skipped: #{new_posts.length - new_recent_posts.length}, analysis enqueue failures: #{analysis_enqueue_failures}."
-
)
-
rescue StandardError => e
-
normalized_error = normalize_job_error(e)
-
action_log&.mark_failed!(
-
error_message: normalized_error.message,
-
extra_metadata: {
-
active_job_id: job_id,
-
executions: executions,
-
error_class: normalized_error.class.name
-
}
-
)
-
raise normalized_error
-
ensure
-
release_profile_scan_lock!(profile_id: profile.id) if lock_acquired
-
end
-
-
private
-
-
def fetch_story_dataset_with_fallback(account:, profile:)
-
dataset = Instagram::Client.new(account: account).fetch_profile_story_dataset!(
-
username: profile.username,
-
stories_limit: 3
-
)
-
{
-
dataset: dataset,
-
warning: { degraded: false, error_class: nil, error_message: nil }
-
}
-
rescue StandardError => e
-
raise unless story_fetch_degradable_error?(e)
-
-
Rails.logger.warn(
-
"[SyncRecentProfilePostsForProfileJob] degraded story fetch for profile_id=#{profile.id} " \
-
"(account_id=#{account.id}): #{e.class}: #{e.message}"
-
)
-
{
-
dataset: {
-
profile: {},
-
user_id: nil,
-
stories: [],
-
fetched_at: Time.current
-
},
-
warning: {
-
degraded: true,
-
error_class: e.class.name,
-
error_message: e.message.to_s
-
}
-
}
-
end
-
-
def story_fetch_degradable_error?(error)
-
error.is_a?(Net::OpenTimeout) ||
-
error.is_a?(Net::ReadTimeout) ||
-
error.is_a?(Errno::ECONNREFUSED) ||
-
error.is_a?(Errno::ECONNRESET) ||
-
error.is_a?(Timeout::Error)
-
end
-
-
def normalize_job_error(error)
-
authentication_error = normalize_authentication_error(error)
-
return authentication_error if authentication_error
-
-
normalize_retryable_error(error)
-
end
-
-
def normalize_authentication_error(error)
-
return error if error.is_a?(Instagram::AuthenticationRequiredError)
-
return nil unless error.is_a?(RuntimeError)
-
-
message = error.message.to_s.downcase
-
auth_runtime_message =
-
message.include?("stored cookies are not authenticated") ||
-
message.include?("authentication required") ||
-
message.include?("no stored cookies")
-
return nil unless auth_runtime_message
-
-
wrapped = Instagram::AuthenticationRequiredError.new(error.message.to_s)
-
wrapped.set_backtrace(error.backtrace)
-
wrapped
-
end
-
-
def normalize_retryable_error(error)
-
return error unless transient_runtime_error?(error)
-
-
wrapped = TransientProfileScanError.new("Transient upstream response failure: #{error.message}")
-
wrapped.set_backtrace(error.backtrace)
-
wrapped
-
end
-
-
def transient_runtime_error?(error)
-
return false unless error.is_a?(RuntimeError)
-
-
message = error.message.to_s.downcase
-
message.include?("http 429") ||
-
message.include?("too many requests") ||
-
message.include?("rate limit") ||
-
message.include?("temporarily blocked")
-
end
-
-
def claim_profile_scan_lock!(profile_id:)
-
return true unless postgres_adapter?
-
-
# Advisory lock keeps at most one scan worker active per profile id.
-
key_a, key_b = profile_scan_lock_keys(profile_id: profile_id)
-
value = ActiveRecord::Base.connection.select_value("SELECT pg_try_advisory_lock(#{key_a}, #{key_b})")
-
ActiveModel::Type::Boolean.new.cast(value)
-
rescue StandardError => e
-
Rails.logger.warn("[SyncRecentProfilePostsForProfileJob] lock claim failed for profile_id=#{profile_id}: #{e.class}: #{e.message}")
-
true
-
end
-
-
def release_profile_scan_lock!(profile_id:)
-
return unless postgres_adapter?
-
-
key_a, key_b = profile_scan_lock_keys(profile_id: profile_id)
-
ActiveRecord::Base.connection.select_value("SELECT pg_advisory_unlock(#{key_a}, #{key_b})")
-
rescue StandardError => e
-
Rails.logger.warn("[SyncRecentProfilePostsForProfileJob] lock release failed for profile_id=#{profile_id}: #{e.class}: #{e.message}")
-
nil
-
end
-
-
def profile_scan_lock_keys(profile_id:)
-
[ PROFILE_SCAN_LOCK_NAMESPACE, profile_id.to_i ]
-
end
-
-
def postgres_adapter?
-
ActiveRecord::Base.connection.adapter_name.to_s.downcase.include?("postgres")
-
rescue StandardError
-
false
-
end
-
-
def update_story_activity!(profile:, story_dataset:)
-
stories = Array(story_dataset[:stories])
-
details = story_dataset[:profile].is_a?(Hash) ? story_dataset[:profile] : {}
-
-
profile.display_name = details[:display_name].presence || profile.display_name
-
profile.profile_pic_url = details[:profile_pic_url].presence || profile.profile_pic_url
-
profile.ig_user_id = details[:ig_user_id].presence || profile.ig_user_id
-
profile.bio = details[:bio].presence || profile.bio
-
profile.followers_count = normalize_count(details[:followers_count]) || profile.followers_count
-
profile.last_post_at = details[:last_post_at].presence || profile.last_post_at
-
-
if stories.any?
-
latest_story_at = stories.filter_map { |story| story[:taken_at] }.compact.max || Time.current
-
profile.last_story_seen_at = latest_story_at
-
profile.record_event!(
-
kind: "story_seen",
-
external_id: "story_seen:profile_scan:#{profile.username}:#{latest_story_at.to_i}",
-
occurred_at: latest_story_at,
-
metadata: {
-
source: "recurring_profile_recent_posts_scan",
-
stories_detected: stories.length
-
}
-
)
-
end
-
-
profile.recompute_last_active!
-
profile.save!
-
end
-
-
def normalize_count(value)
-
text = value.to_s.strip
-
return nil unless text.match?(/\A\d+\z/)
-
-
text.to_i
-
rescue StandardError
-
nil
-
end
-
-
def handle_policy_skip!(account:, profile:, action_log:, decision:, story_dataset:, story_fetch_warning:)
-
reason_code = decision[:reason_code].to_s
-
if reason_code == "non_personal_profile_page" || reason_code == "scan_excluded_tag"
-
Instagram::ProfileScanPolicy.mark_scan_excluded!(profile: profile)
-
end
-
-
profile.update!(last_synced_at: Time.current)
-
profile.record_event!(
-
kind: "profile_recent_posts_scan_skipped",
-
external_id: "profile_recent_posts_scan_skipped:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "recurring_profile_recent_posts_scan",
-
reason_code: reason_code,
-
reason: decision[:reason],
-
followers_count: decision[:followers_count],
-
max_followers: decision[:max_followers],
-
stories_detected: Array(story_dataset[:stories]).length,
-
story_dataset_degraded: story_fetch_warning[:degraded],
-
story_dataset_error_class: story_fetch_warning[:error_class],
-
story_dataset_error_message: story_fetch_warning[:error_message]
-
}
-
)
-
-
action_log.mark_succeeded!(
-
extra_metadata: {
-
skipped: true,
-
skip_reason_code: reason_code,
-
skip_reason: decision[:reason],
-
followers_count: decision[:followers_count],
-
max_followers: decision[:max_followers],
-
stories_detected: Array(story_dataset[:stories]).length,
-
story_dataset_degraded: story_fetch_warning[:degraded],
-
story_dataset_error_class: story_fetch_warning[:error_class],
-
story_dataset_error_message: story_fetch_warning[:error_message]
-
},
-
log_text: "Skipped profile scan: #{decision[:reason]}"
-
)
-
end
-
-
def apply_scan_tags!(profile:, has_new_posts:)
-
visited_tag = ProfileTag.find_or_create_by!(name: VISITED_TAG)
-
profile.profile_tags << visited_tag unless profile.profile_tags.exists?(id: visited_tag.id)
-
-
return unless has_new_posts
-
-
analyzed_tag = ProfileTag.find_or_create_by!(name: ANALYZED_TAG)
-
profile.profile_tags << analyzed_tag unless profile.profile_tags.exists?(id: analyzed_tag.id)
-
end
-
end
-
class VerifyInstagramMessageabilityJob < ApplicationJob
-
queue_as :profiles
-
-
def perform(instagram_account_id:, instagram_profile_id:, profile_action_log_id: nil)
-
account = InstagramAccount.find(instagram_account_id)
-
profile = account.instagram_profiles.find(instagram_profile_id)
-
action_log = find_or_create_action_log(
-
account: account,
-
profile: profile,
-
action: "verify_messageability",
-
profile_action_log_id: profile_action_log_id
-
)
-
action_log.mark_running!(extra_metadata: { queue_name: queue_name, active_job_id: job_id })
-
-
result = Instagram::Client.new(account: account).verify_messageability!(username: profile.username)
-
profile.update!(
-
can_message: result[:can_message],
-
restriction_reason: result[:restriction_reason],
-
dm_interaction_state: result[:dm_state].to_s.presence || (result[:can_message] ? "messageable" : "unavailable"),
-
dm_interaction_reason: result[:dm_reason].to_s.presence || result[:restriction_reason].to_s,
-
dm_interaction_checked_at: Time.current,
-
dm_interaction_retry_after_at: result[:dm_retry_after_at]
-
)
-
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "notice", message: "Messageability for #{profile.username}: #{result[:can_message] ? 'Yes' : 'No'}." }
-
)
-
action_log.mark_succeeded!(
-
extra_metadata: result,
-
log_text: "Messageability result: #{result[:can_message] ? 'Yes' : 'No'}"
-
)
-
rescue StandardError => e
-
Turbo::StreamsChannel.broadcast_append_to(
-
account,
-
target: "notifications",
-
partial: "shared/notification",
-
locals: { kind: "alert", message: "Messageability check failed: #{e.message}" }
-
)
-
action_log&.mark_failed!(error_message: e.message, extra_metadata: { active_job_id: job_id })
-
raise
-
end
-
-
private
-
-
def find_or_create_action_log(account:, profile:, action:, profile_action_log_id:)
-
log = profile.instagram_profile_action_logs.find_by(id: profile_action_log_id) if profile_action_log_id.present?
-
return log if log
-
-
profile.instagram_profile_action_logs.create!(
-
instagram_account: account,
-
action: action,
-
status: "queued",
-
trigger_source: "job",
-
occurred_at: Time.current,
-
active_job_id: job_id,
-
queue_name: queue_name,
-
metadata: { created_by: self.class.name }
-
)
-
end
-
end
-
class WorkspaceProcessActionsTodoPostJob < ApplicationJob
-
queue_as :ai
-
-
PROFILE_INCOMPLETE_REASON_CODES =
-
if defined?(ProcessPostMetadataTaggingJob::PROFILE_INCOMPLETE_REASON_CODES)
-
ProcessPostMetadataTaggingJob::PROFILE_INCOMPLETE_REASON_CODES
-
else
-
%w[
-
latest_posts_not_analyzed
-
insufficient_analyzed_posts
-
no_recent_posts_available
-
missing_structured_post_signals
-
profile_preparation_failed
-
profile_preparation_error
-
].freeze
-
end
-
-
PROFILE_RETRY_MAX_ATTEMPTS = ENV.fetch("WORKSPACE_ACTIONS_PROFILE_RETRY_MAX_ATTEMPTS", 4).to_i.clamp(1, 12)
-
POST_RETRY_WAIT_MINUTES = ENV.fetch("WORKSPACE_ACTIONS_POST_RETRY_WAIT_MINUTES", 20).to_i.clamp(5, 180)
-
MEDIA_RETRY_WAIT_MINUTES = ENV.fetch("WORKSPACE_ACTIONS_MEDIA_RETRY_WAIT_MINUTES", 10).to_i.clamp(2, 90)
-
ENQUEUE_COOLDOWN_SECONDS = ENV.fetch("WORKSPACE_ACTIONS_ENQUEUE_COOLDOWN_SECONDS", 180).to_i.clamp(15, 1800)
-
RUNNING_LOCK_SECONDS = ENV.fetch("WORKSPACE_ACTIONS_RUNNING_LOCK_SECONDS", 600).to_i.clamp(60, 3600)
-
-
def self.enqueue_if_needed!(account:, profile:, post:, requested_by:, wait_until: nil, force: false)
-
return { enqueued: false, reason: "post_missing" } unless account && profile && post
-
-
now = Time.current
-
forced = ActiveModel::Type::Boolean.new.cast(force)
-
scheduled_at = wait_until.is_a?(Time) ? wait_until : nil
-
-
# Persisted queue state is row-local; lock to prevent duplicate enqueue races.
-
post.with_lock do
-
metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
state = metadata["workspace_actions"].is_a?(Hash) ? metadata["workspace_actions"].deep_dup : {}
-
suggestions = normalized_suggestions(post)
-
return { enqueued: false, reason: "already_ready" } if suggestions.any? && !forced
-
-
next_run_at = parse_time(state["next_run_at"])
-
if next_run_at.present? && next_run_at > now && !forced && scheduled_at.nil?
-
return { enqueued: false, reason: "retry_already_scheduled", next_run_at: next_run_at.iso8601 }
-
end
-
-
lock_until = parse_time(state["lock_until"])
-
if lock_until.present? && lock_until > now && !forced
-
return { enqueued: false, reason: "already_running", lock_until: lock_until.iso8601 }
-
end
-
-
last_enqueued_at = parse_time(state["last_enqueued_at"])
-
if last_enqueued_at.present? && (now - last_enqueued_at) < ENQUEUE_COOLDOWN_SECONDS && !forced && scheduled_at.nil?
-
return { enqueued: false, reason: "enqueue_cooldown_active" }
-
end
-
-
job =
-
if scheduled_at.present?
-
set(wait_until: scheduled_at).perform_later(
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
requested_by: requested_by.to_s
-
)
-
else
-
perform_later(
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
requested_by: requested_by.to_s
-
)
-
end
-
-
state["status"] = "queued"
-
state["requested_by"] = requested_by.to_s.presence || "workspace"
-
state["job_id"] = job.job_id
-
state["queue_name"] = job.queue_name
-
state["last_enqueued_at"] = now.iso8601(3)
-
state["last_error"] = nil
-
state["next_run_at"] = scheduled_at&.iso8601(3)
-
state["updated_at"] = now.iso8601(3)
-
state["source"] = name
-
metadata["workspace_actions"] = state
-
-
post.update!(metadata: metadata)
-
-
{
-
enqueued: true,
-
reason: scheduled_at.present? ? "scheduled" : "queued",
-
job_id: job.job_id,
-
queue_name: job.queue_name,
-
next_run_at: scheduled_at&.iso8601(3)
-
}
-
end
-
rescue StandardError => e
-
{
-
enqueued: false,
-
reason: "enqueue_failed",
-
error_class: e.class.name,
-
error_message: e.message.to_s
-
}
-
end
-
-
def perform(instagram_account_id:, instagram_profile_id:, instagram_profile_post_id:, requested_by: "workspace")
-
account = InstagramAccount.find(instagram_account_id)
-
profile = account.instagram_profiles.find(instagram_profile_id)
-
post = profile.instagram_profile_posts.find(instagram_profile_post_id)
-
-
unless user_created_post?(post)
-
persist_workspace_state!(post: post, status: "skipped_non_user_post", requested_by: requested_by, next_run_at: nil)
-
return
-
end
-
-
policy_decision = Instagram::ProfileScanPolicy.new(profile: profile).decision
-
if ActiveModel::Type::Boolean.new.cast(policy_decision[:skip_post_analysis])
-
persist_workspace_state!(
-
post: post,
-
status: "skipped_page_profile",
-
requested_by: requested_by,
-
last_error: policy_decision[:reason].to_s,
-
next_run_at: nil
-
)
-
return
-
end
-
-
if post_deleted_from_source?(post)
-
persist_workspace_state!(post: post, status: "skipped_deleted_source", requested_by: requested_by, next_run_at: nil)
-
return
-
end
-
-
mark_running!(post: post, requested_by: requested_by)
-
ensure_video_preview_generation!(post: post)
-
-
unless post.media.attached?
-
queue_media_download!(account: account, profile: profile, post: post)
-
schedule_retry!(
-
account: account,
-
profile: profile,
-
post: post,
-
requested_by: requested_by,
-
wait_until: Time.current + MEDIA_RETRY_WAIT_MINUTES.minutes,
-
status: "waiting_media_download",
-
last_error: nil
-
)
-
return
-
end
-
-
if post_analysis_running?(post)
-
schedule_retry!(
-
account: account,
-
profile: profile,
-
post: post,
-
requested_by: requested_by,
-
wait_until: Time.current + POST_RETRY_WAIT_MINUTES.minutes,
-
status: "waiting_post_analysis",
-
last_error: nil
-
)
-
return
-
end
-
-
unless post_analyzed?(post)
-
queue_post_analysis!(account: account, profile: profile, post: post)
-
schedule_retry!(
-
account: account,
-
profile: profile,
-
post: post,
-
requested_by: requested_by,
-
wait_until: Time.current + POST_RETRY_WAIT_MINUTES.minutes,
-
status: "waiting_post_analysis",
-
last_error: nil
-
)
-
return
-
end
-
-
suggestions = self.class.normalized_suggestions(post)
-
if suggestions.any?
-
persist_workspace_state!(
-
post: post,
-
status: "ready",
-
requested_by: requested_by,
-
suggestions_count: suggestions.length,
-
next_run_at: nil
-
)
-
return
-
end
-
-
comment_result = Ai::PostCommentGenerationService.new(
-
account: account,
-
profile: profile,
-
post: post,
-
enforce_required_evidence: true
-
).run!
-
-
post.reload
-
suggestions = self.class.normalized_suggestions(post)
-
if suggestions.any?
-
persist_workspace_state!(
-
post: post,
-
status: "ready",
-
requested_by: requested_by,
-
suggestions_count: suggestions.length,
-
next_run_at: nil
-
)
-
return
-
end
-
-
if retryable_profile_incomplete_block?(post: post, comment_result: comment_result)
-
retry_result = schedule_build_history_retry!(
-
account: account,
-
profile: profile,
-
post: post,
-
requested_by: requested_by,
-
history_reason_code: post.metadata.dig("comment_generation_policy", "history_reason_code").to_s
-
)
-
-
persist_workspace_state!(
-
post: post,
-
status: "waiting_build_history",
-
requested_by: requested_by,
-
next_run_at: parse_time(retry_result[:next_run_at]),
-
last_error: retry_result[:queued] ? nil : retry_result[:reason].to_s
-
)
-
return
-
end
-
-
blocked_reason = post.metadata.dig("comment_generation_policy", "blocked_reason").to_s
-
reason_code = post.metadata.dig("comment_generation_policy", "blocked_reason_code").to_s
-
persist_workspace_state!(
-
post: post,
-
status: "failed",
-
requested_by: requested_by,
-
next_run_at: nil,
-
last_error: blocked_reason.presence || reason_code.presence || "comment_generation_failed"
-
)
-
rescue StandardError => e
-
post&.reload
-
persist_workspace_state!(
-
post: post,
-
status: "failed",
-
requested_by: requested_by,
-
next_run_at: nil,
-
last_error: "#{e.class}: #{e.message}"
-
) if post&.persisted?
-
raise
-
end
-
-
private
-
-
def self.parse_time(value)
-
return nil if value.to_s.blank?
-
-
Time.zone.parse(value.to_s)
-
rescue StandardError
-
nil
-
end
-
-
def parse_time(value)
-
self.class.parse_time(value)
-
end
-
-
def self.normalized_suggestions(post)
-
analysis = post.analysis.is_a?(Hash) ? post.analysis : {}
-
Array(analysis["comment_suggestions"]).map { |value| value.to_s.strip }.reject(&:blank?).uniq.first(8)
-
rescue StandardError
-
[]
-
end
-
-
def persist_workspace_state!(post:, status:, requested_by:, next_run_at:, last_error: nil, suggestions_count: nil)
-
post.with_lock do
-
metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
state = metadata["workspace_actions"].is_a?(Hash) ? metadata["workspace_actions"].deep_dup : {}
-
-
state["status"] = status.to_s
-
state["requested_by"] = requested_by.to_s.presence || state["requested_by"].to_s.presence || "workspace"
-
state["updated_at"] = Time.current.iso8601(3)
-
state["finished_at"] = Time.current.iso8601(3)
-
state["lock_until"] = nil
-
state["last_error"] = last_error.to_s.presence
-
state["next_run_at"] = next_run_at&.iso8601(3)
-
state["suggestions_count"] = suggestions_count.to_i if suggestions_count.present?
-
state["last_ready_at"] = Time.current.iso8601(3) if status.to_s == "ready"
-
-
metadata["workspace_actions"] = state
-
post.update!(metadata: metadata)
-
end
-
rescue StandardError
-
nil
-
end
-
-
def mark_running!(post:, requested_by:)
-
post.with_lock do
-
metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
state = metadata["workspace_actions"].is_a?(Hash) ? metadata["workspace_actions"].deep_dup : {}
-
now = Time.current
-
-
state["status"] = "running"
-
state["requested_by"] = requested_by.to_s.presence || "workspace"
-
state["started_at"] = now.iso8601(3)
-
state["updated_at"] = now.iso8601(3)
-
state["lock_until"] = (now + RUNNING_LOCK_SECONDS.seconds).iso8601(3)
-
state["last_error"] = nil
-
-
metadata["workspace_actions"] = state
-
post.update!(metadata: metadata)
-
end
-
end
-
-
def schedule_retry!(account:, profile:, post:, requested_by:, wait_until:, status:, last_error:)
-
retry_time = wait_until.is_a?(Time) ? wait_until : Time.current + POST_RETRY_WAIT_MINUTES.minutes
-
result = self.class.enqueue_if_needed!(
-
account: account,
-
profile: profile,
-
post: post,
-
requested_by: "workspace_retry:#{requested_by}",
-
wait_until: retry_time,
-
force: true
-
)
-
-
persist_workspace_state!(
-
post: post,
-
status: status,
-
requested_by: requested_by,
-
next_run_at: retry_time,
-
last_error: result[:enqueued] ? nil : (last_error.presence || result[:reason].to_s)
-
)
-
-
result
-
end
-
-
def queue_media_download!(account:, profile:, post:)
-
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
workspace = metadata["workspace_actions"].is_a?(Hash) ? metadata["workspace_actions"] : {}
-
pending_job = workspace["media_download_job_id"].to_s
-
-
if pending_job.present? && post.media.attached?
-
return { queued: false, reason: "already_downloaded" }
-
end
-
-
job = DownloadInstagramProfilePostMediaJob.perform_later(
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
trigger_analysis: false
-
)
-
-
post.with_lock do
-
updated_metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
state = updated_metadata["workspace_actions"].is_a?(Hash) ? updated_metadata["workspace_actions"].deep_dup : {}
-
state["media_download_job_id"] = job.job_id
-
state["media_download_queued_at"] = Time.current.iso8601(3)
-
updated_metadata["workspace_actions"] = state
-
post.update!(metadata: updated_metadata)
-
end
-
-
{ queued: true, job_id: job.job_id }
-
rescue StandardError => e
-
{ queued: false, reason: "media_download_enqueue_failed", error_class: e.class.name, error_message: e.message.to_s }
-
end
-
-
def queue_post_analysis!(account:, profile:, post:)
-
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
workspace = metadata["workspace_actions"].is_a?(Hash) ? metadata["workspace_actions"] : {}
-
last_queued_at = parse_time(workspace["post_analysis_queued_at"])
-
if last_queued_at.present? && last_queued_at > 10.minutes.ago && post_analysis_running?(post)
-
return { queued: false, reason: "post_analysis_already_running" }
-
end
-
-
job = AnalyzeInstagramProfilePostJob.perform_later(
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
task_flags: {
-
analyze_visual: true,
-
analyze_faces: true,
-
run_ocr: true,
-
run_video: true,
-
run_metadata: true,
-
generate_comments: false,
-
enforce_comment_evidence_policy: false,
-
retry_on_incomplete_profile: false
-
}
-
)
-
-
post.with_lock do
-
updated_metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
state = updated_metadata["workspace_actions"].is_a?(Hash) ? updated_metadata["workspace_actions"].deep_dup : {}
-
state["post_analysis_job_id"] = job.job_id
-
state["post_analysis_queued_at"] = Time.current.iso8601(3)
-
updated_metadata["workspace_actions"] = state
-
post.update!(metadata: updated_metadata)
-
end
-
-
{ queued: true, job_id: job.job_id }
-
rescue StandardError => e
-
{ queued: false, reason: "post_analysis_enqueue_failed", error_class: e.class.name, error_message: e.message.to_s }
-
end
-
-
def schedule_build_history_retry!(account:, profile:, post:, requested_by:, history_reason_code:)
-
post.with_lock do
-
metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
state = metadata["workspace_actions"].is_a?(Hash) ? metadata["workspace_actions"].deep_dup : {}
-
attempts = state["profile_retry_attempts"].to_i
-
if attempts >= PROFILE_RETRY_MAX_ATTEMPTS
-
next {
-
queued: false,
-
reason: "retry_attempts_exhausted",
-
next_run_at: nil
-
}
-
end
-
-
resume_result = BuildInstagramProfileHistoryJob.enqueue_with_resume_if_needed!(
-
account: account,
-
profile: profile,
-
trigger_source: "workspace_actions_queue",
-
requested_by: self.class.name,
-
resume_job: {
-
job_class: self.class,
-
job_kwargs: {
-
instagram_account_id: account.id,
-
instagram_profile_id: profile.id,
-
instagram_profile_post_id: post.id,
-
requested_by: "workspace_history_retry:#{requested_by}"
-
}
-
}
-
)
-
unless ActiveModel::Type::Boolean.new.cast(resume_result[:accepted])
-
next {
-
queued: false,
-
reason: resume_result[:reason].to_s.presence || "build_history_enqueue_failed",
-
next_run_at: nil
-
}
-
end
-
-
state["profile_retry_attempts"] = attempts + 1
-
state["profile_retry_reason_code"] = history_reason_code.to_s
-
state["build_history_action_log_id"] = resume_result[:action_log_id].to_i if resume_result[:action_log_id].present?
-
state["build_history_job_id"] = resume_result[:job_id].to_s.presence
-
state["next_run_at"] = resume_result[:next_run_at].to_s.presence
-
state["updated_at"] = Time.current.iso8601(3)
-
metadata["workspace_actions"] = state
-
post.update!(metadata: metadata)
-
-
{
-
queued: true,
-
reason: "build_history_fallback_registered",
-
next_run_at: resume_result[:next_run_at],
-
action_log_id: resume_result[:action_log_id],
-
job_id: resume_result[:job_id].to_s
-
}
-
end
-
rescue StandardError => e
-
{
-
queued: false,
-
reason: "retry_enqueue_failed",
-
next_run_at: nil,
-
error_class: e.class.name,
-
error_message: e.message.to_s
-
}
-
end
-
-
def retryable_profile_incomplete_block?(post:, comment_result:)
-
return false unless ActiveModel::Type::Boolean.new.cast(comment_result[:blocked])
-
return false unless comment_result[:reason_code].to_s == "missing_required_evidence"
-
-
policy = post.metadata.is_a?(Hash) ? post.metadata["comment_generation_policy"] : nil
-
return false unless policy.is_a?(Hash)
-
return false if ActiveModel::Type::Boolean.new.cast(policy["history_ready"])
-
-
PROFILE_INCOMPLETE_REASON_CODES.include?(policy["history_reason_code"].to_s)
-
rescue StandardError
-
false
-
end
-
-
def post_analysis_running?(post)
-
return true if post.ai_status.to_s.in?(%w[pending running])
-
-
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
pipeline = metadata["ai_pipeline"].is_a?(Hash) ? metadata["ai_pipeline"] : {}
-
pipeline["status"].to_s == "running"
-
rescue StandardError
-
false
-
end
-
-
def post_analyzed?(post)
-
post.ai_status.to_s == "analyzed" && post.analyzed_at.present?
-
end
-
-
def post_deleted_from_source?(post)
-
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
ActiveModel::Type::Boolean.new.cast(metadata["deleted_from_source"])
-
end
-
-
def user_created_post?(post)
-
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
post_kind = metadata["post_kind"].to_s.downcase
-
return false if post_kind == "story"
-
-
product_type = metadata["product_type"].to_s.downcase
-
return false if product_type == "story"
-
-
return false if ActiveModel::Type::Boolean.new.cast(metadata["is_story"])
-
-
true
-
rescue StandardError
-
false
-
end
-
-
def ensure_video_preview_generation!(post:)
-
return unless post.media.attached?
-
return unless post.media.blob&.content_type.to_s.start_with?("video/")
-
return if post.preview_image.attached?
-
-
cache_key = "workspace_actions:preview:#{post.id}"
-
Rails.cache.fetch(cache_key, expires_in: 30.minutes) do
-
GenerateProfilePostPreviewImageJob.perform_later(instagram_profile_post_id: post.id)
-
true
-
end
-
rescue StandardError
-
nil
-
end
-
end
-
class ApplicationMailer < ActionMailer::Base
-
default from: "from@example.com"
-
layout "mailer"
-
end
-
class ActiveStorageIngestion < ApplicationRecord
-
belongs_to :attachment, class_name: "ActiveStorage::Attachment", foreign_key: :active_storage_attachment_id
-
belongs_to :blob, class_name: "ActiveStorage::Blob", foreign_key: :active_storage_blob_id
-
belongs_to :instagram_account, optional: true
-
belongs_to :instagram_profile, optional: true
-
-
validates :active_storage_attachment_id, uniqueness: true
-
validates :attachment_name, :blob_filename, :blob_byte_size, presence: true
-
-
scope :recent_first, -> { order(created_at: :desc, id: :desc) }
-
-
after_commit :broadcast_live_updates
-
-
def self.record_from_attachment!(attachment:)
-
return if exists?(active_storage_attachment_id: attachment.id)
-
-
blob = attachment.blob
-
context = extract_record_context(record: attachment.record)
-
current_context = Current.job_context
-
-
create!(
-
active_storage_attachment_id: attachment.id,
-
active_storage_blob_id: blob.id,
-
attachment_name: attachment.name.to_s,
-
record_type: attachment.record_type.to_s,
-
record_id: attachment.record_id,
-
blob_filename: blob.filename.to_s,
-
blob_content_type: blob.content_type.to_s.presence,
-
blob_byte_size: blob.byte_size.to_i,
-
instagram_account_id: context[:instagram_account_id] || current_context[:instagram_account_id],
-
instagram_profile_id: context[:instagram_profile_id] || current_context[:instagram_profile_id],
-
created_by_job_class: current_context[:job_class],
-
created_by_active_job_id: current_context[:active_job_id],
-
created_by_provider_job_id: current_context[:provider_job_id],
-
queue_name: current_context[:queue_name],
-
metadata: {
-
service_name: blob.service_name,
-
checksum: blob.checksum,
-
content_type: blob.content_type,
-
blob_created_at: blob.created_at&.iso8601
-
}
-
)
-
rescue StandardError => e
-
Rails.logger.warn("[storage.ingestion] capture failed: #{e.class}: #{e.message}")
-
nil
-
end
-
-
def self.extract_record_context(record:)
-
return {} unless record
-
-
account_id =
-
if record.respond_to?(:instagram_account_id)
-
record.instagram_account_id
-
elsif record.respond_to?(:instagram_account) && record.instagram_account.respond_to?(:id)
-
record.instagram_account.id
-
end
-
-
profile_id =
-
if record.respond_to?(:instagram_profile_id)
-
record.instagram_profile_id
-
elsif record.respond_to?(:instagram_profile) && record.instagram_profile.respond_to?(:id)
-
record.instagram_profile.id
-
elsif record.is_a?(InstagramProfile)
-
record.id
-
end
-
-
{ instagram_account_id: account_id, instagram_profile_id: profile_id }
-
rescue StandardError
-
{}
-
end
-
-
private
-
-
def broadcast_live_updates
-
Ops::LiveUpdateBroadcaster.broadcast!(
-
topic: "storage_ingestions_changed",
-
account_id: instagram_account_id,
-
payload: { ingestion_id: id },
-
throttle_key: "storage_ingestions_changed"
-
)
-
end
-
end
-
class AiAnalysis < ApplicationRecord
-
belongs_to :instagram_account
-
belongs_to :analyzable, polymorphic: true
-
belongs_to :cached_from_analysis, class_name: "AiAnalysis", foreign_key: :cached_from_ai_analysis_id, optional: true
-
has_many :cached_copies, class_name: "AiAnalysis", foreign_key: :cached_from_ai_analysis_id, dependent: :nullify
-
has_one :instagram_profile_insight, dependent: :destroy
-
has_one :instagram_profile_message_strategy, dependent: :destroy
-
has_many :instagram_profile_signal_evidences, dependent: :destroy
-
has_one :instagram_post_insight, dependent: :destroy
-
-
encrypts :prompt
-
encrypts :response_text
-
-
validates :purpose, presence: true, inclusion: { in: %w[profile post] }
-
validates :provider, presence: true
-
validates :status, presence: true
-
-
scope :recent_first, -> { order(created_at: :desc, id: :desc) }
-
scope :succeeded, -> { where(status: "succeeded") }
-
scope :reusable_for, ->(purpose:, media_fingerprint:) {
-
succeeded
-
.where(purpose: purpose, media_fingerprint: media_fingerprint)
-
.where.not(analysis: nil)
-
.recent_first
-
}
-
end
-
class AiApiCall < ApplicationRecord
-
belongs_to :instagram_account, optional: true
-
-
CATEGORIES = %w[image_analysis video_analysis report_generation text_generation healthcheck other].freeze
-
STATUSES = %w[succeeded failed].freeze
-
-
validates :provider, presence: true
-
validates :operation, presence: true
-
validates :category, presence: true, inclusion: { in: CATEGORIES }
-
validates :status, presence: true, inclusion: { in: STATUSES }
-
validates :occurred_at, presence: true
-
-
scope :recent_first, -> { order(occurred_at: :desc, id: :desc) }
-
scope :within, ->(range) { where(occurred_at: range) }
-
end
-
class AiProviderSetting < ApplicationRecord
-
SUPPORTED_PROVIDERS = %w[local].freeze
-
-
encrypts :api_key
-
-
validates :provider, presence: true, inclusion: { in: SUPPORTED_PROVIDERS }
-
validates :provider, uniqueness: true
-
validates :priority, numericality: { greater_than_or_equal_to: 0 }
-
-
scope :enabled_first, -> { order(enabled: :desc, priority: :asc, provider: :asc) }
-
-
def config_hash
-
value = config
-
return {} unless value.is_a?(Hash)
-
-
value.stringify_keys
-
end
-
-
def config_value(key)
-
config_hash[key.to_s]
-
end
-
-
def set_config_value(key, value)
-
merged = config_hash
-
if value.present?
-
merged[key.to_s] = value
-
else
-
merged.delete(key.to_s)
-
end
-
self.config = merged
-
end
-
-
def display_name
-
case provider
-
when "local" then "Local AI Microservice"
-
else provider.to_s.humanize
-
end
-
end
-
-
def effective_api_key
-
return api_key.to_s if api_key.to_s.present?
-
""
-
end
-
-
def effective_model
-
model = config_value("model").to_s
-
return model if model.present?
-
""
-
end
-
-
def api_key_present?
-
effective_api_key.present?
-
end
-
end
-
class AppIssue < ApplicationRecord
-
STATUSES = %w[open pending resolved].freeze
-
SEVERITIES = %w[info warn error critical].freeze
-
-
belongs_to :instagram_account, optional: true
-
belongs_to :instagram_profile, optional: true
-
belongs_to :background_job_failure, optional: true
-
-
validates :fingerprint, presence: true, uniqueness: true
-
validates :issue_type, :source, :title, presence: true
-
validates :status, inclusion: { in: STATUSES }
-
validates :severity, inclusion: { in: SEVERITIES }
-
validates :first_seen_at, :last_seen_at, presence: true
-
-
scope :recent_first, -> { order(last_seen_at: :desc, id: :desc) }
-
scope :active, -> { where.not(status: "resolved") }
-
-
after_commit :broadcast_live_updates
-
-
def retryable?
-
background_job_failure.present? && background_job_failure.retryable?
-
end
-
-
def mark_open!(notes: nil)
-
update!(
-
status: "open",
-
resolved_at: nil,
-
resolution_notes: notes.presence || resolution_notes
-
)
-
end
-
-
def mark_pending!(notes: nil)
-
update!(
-
status: "pending",
-
resolved_at: nil,
-
resolution_notes: notes.presence || resolution_notes
-
)
-
end
-
-
def mark_resolved!(notes: nil)
-
update!(
-
status: "resolved",
-
resolved_at: Time.current,
-
resolution_notes: notes.presence || resolution_notes
-
)
-
end
-
-
private
-
-
def broadcast_live_updates
-
Ops::LiveUpdateBroadcaster.broadcast!(
-
topic: "issues_changed",
-
account_id: instagram_account_id,
-
payload: { issue_id: id, status: status },
-
throttle_key: "issues_changed"
-
)
-
Ops::LiveUpdateBroadcaster.broadcast!(
-
topic: "dashboard_metrics_changed",
-
account_id: instagram_account_id,
-
payload: { source: "app_issue" },
-
throttle_key: "dashboard_metrics_changed"
-
)
-
end
-
end
-
1
class ApplicationRecord < ActiveRecord::Base
-
1
primary_abstract_class
-
end
-
class BackgroundJobFailure < ApplicationRecord
-
FAILURE_KINDS = %w[authentication transient runtime].freeze
-
-
belongs_to :instagram_account, optional: true
-
belongs_to :instagram_profile, optional: true
-
has_many :app_issues, dependent: :nullify
-
-
validates :active_job_id, presence: true
-
validates :job_class, presence: true
-
validates :error_class, presence: true
-
validates :error_message, presence: true
-
validates :occurred_at, presence: true
-
validates :failure_kind, inclusion: { in: FAILURE_KINDS }
-
-
scope :recent_first, -> { order(occurred_at: :desc, id: :desc) }
-
-
after_commit :broadcast_live_updates
-
-
def auth_failure?
-
failure_kind == "authentication"
-
end
-
-
def retryable_now?
-
retryable? && !auth_failure?
-
end
-
-
def retryable?
-
self[:retryable] == true
-
end
-
-
private
-
-
def broadcast_live_updates
-
Ops::LiveUpdateBroadcaster.broadcast!(
-
topic: "job_failures_changed",
-
account_id: instagram_account_id,
-
payload: { failure_id: id, failure_kind: failure_kind },
-
throttle_key: "job_failures_changed"
-
)
-
Ops::LiveUpdateBroadcaster.broadcast!(
-
topic: "jobs_changed",
-
account_id: instagram_account_id,
-
payload: { source: "background_job_failure", failure_id: id },
-
throttle_key: "jobs_changed"
-
)
-
end
-
end
-
1
module ActiveStorageIngestionTracking
-
1
extend ActiveSupport::Concern
-
-
1
included do
-
1
after_create_commit :capture_storage_ingestion_row
-
end
-
-
1
private
-
-
1
def capture_storage_ingestion_row
-
ActiveStorageIngestion.record_from_attachment!(attachment: self)
-
end
-
end
-
class ConversationPeer < ApplicationRecord
-
belongs_to :instagram_account
-
-
validates :username, presence: true
-
end
-
1
class Current < ActiveSupport::CurrentAttributes
-
1
attribute :active_job_id,
-
:provider_job_id,
-
:job_class,
-
:queue_name,
-
:instagram_account_id,
-
:instagram_profile_id
-
-
1
def job_context
-
{
-
active_job_id: active_job_id,
-
provider_job_id: provider_job_id,
-
job_class: job_class,
-
queue_name: queue_name,
-
instagram_account_id: instagram_account_id,
-
instagram_profile_id: instagram_profile_id
-
}
-
end
-
end
-
1
class InstagramAccount < ApplicationRecord
-
1
CONTINUOUS_PROCESSING_STATES = %w[idle running paused].freeze
-
-
1
has_many :recipients, dependent: :destroy
-
1
has_many :conversation_peers, dependent: :destroy
-
1
has_many :instagram_profiles, dependent: :destroy
-
1
has_many :instagram_messages, dependent: :destroy
-
1
has_many :sync_runs, dependent: :destroy
-
1
has_many :instagram_profile_analyses, through: :instagram_profiles
-
1
has_many :instagram_posts, dependent: :destroy
-
1
has_many :instagram_profile_posts, dependent: :destroy
-
1
has_many :ai_analyses, dependent: :destroy
-
1
has_many :ai_api_calls, dependent: :destroy
-
1
has_many :instagram_profile_action_logs, dependent: :destroy
-
1
has_many :instagram_profile_insights, dependent: :destroy
-
1
has_many :instagram_profile_message_strategies, dependent: :destroy
-
1
has_many :instagram_profile_signal_evidences, dependent: :destroy
-
1
has_many :instagram_post_insights, dependent: :destroy
-
1
has_many :instagram_post_entities, dependent: :destroy
-
1
has_many :instagram_profile_history_chunks, dependent: :destroy
-
1
has_many :instagram_stories, dependent: :destroy
-
1
has_many :instagram_story_people, dependent: :destroy
-
1
has_many :app_issues, dependent: :nullify
-
1
has_many :active_storage_ingestions, dependent: :nullify
-
-
1
encryption = Rails.application.config.active_record.encryption
-
1
else: 1
if encryption.primary_key.present? &&
-
encryption.deterministic_key.present? &&
-
then: 0
encryption.key_derivation_salt.present?
-
encrypts :cookies_json
-
encrypts :local_storage_json
-
encrypts :session_storage_json
-
encrypts :auth_snapshot_json
-
end
-
-
1
validates :username, presence: true
-
1
validates :continuous_processing_state, inclusion: { in: CONTINUOUS_PROCESSING_STATES }, allow_nil: true
-
-
1
scope :continuous_processing_enabled, -> { where(continuous_processing_enabled: true) }
-
-
1
def continuous_processing_backoff_active?
-
continuous_processing_retry_after_at.present? && continuous_processing_retry_after_at > Time.current
-
end
-
-
1
def cookies
-
then: 0
else: 0
return [] if cookies_json.blank?
-
-
JSON.parse(cookies_json)
-
rescue JSON::ParserError
-
[]
-
end
-
-
1
def cookies=(raw_cookies)
-
self.cookies_json = Array(raw_cookies).to_json
-
end
-
-
1
def local_storage
-
parse_json_array(local_storage_json)
-
end
-
-
1
def local_storage=(entries)
-
self.local_storage_json = Array(entries).to_json
-
end
-
-
1
def session_storage
-
parse_json_array(session_storage_json)
-
end
-
-
1
def session_storage=(entries)
-
self.session_storage_json = Array(entries).to_json
-
end
-
-
1
def auth_snapshot
-
then: 0
else: 0
return {} if auth_snapshot_json.blank?
-
-
JSON.parse(auth_snapshot_json)
-
rescue JSON::ParserError
-
{}
-
end
-
-
1
def auth_snapshot=(value)
-
self.auth_snapshot_json = value.to_h.to_json
-
end
-
-
1
def session_bundle
-
{
-
cookies: cookies,
-
local_storage: local_storage,
-
session_storage: session_storage,
-
user_agent: user_agent,
-
auth_snapshot: auth_snapshot
-
}
-
end
-
-
1
def session_bundle=(bundle)
-
payload = bundle.to_h.deep_symbolize_keys
-
self.cookies = payload[:cookies]
-
self.local_storage = payload[:local_storage]
-
self.session_storage = payload[:session_storage]
-
self.user_agent = payload[:user_agent].presence
-
self.auth_snapshot = payload[:auth_snapshot] || {}
-
end
-
-
1
def sessionid_cookie_present?
-
cookie_named_present?("sessionid")
-
end
-
-
1
def csrftoken_cookie_present?
-
cookie_named_present?("csrftoken")
-
end
-
-
1
def cookie_authenticated?
-
login_state.to_s == "authenticated" && sessionid_cookie_present?
-
end
-
-
1
private
-
-
1
def parse_json_array(value)
-
then: 0
else: 0
return [] if value.blank?
-
-
JSON.parse(value)
-
rescue JSON::ParserError
-
[]
-
end
-
-
1
def cookie_named_present?(name)
-
target = name.to_s
-
cookies.any? do |cookie|
-
else: 0
then: 0
next false unless cookie.is_a?(Hash)
-
-
cookie["name"].to_s == target && cookie["value"].to_s.present?
-
end
-
end
-
end
-
class InstagramMessage < ApplicationRecord
-
belongs_to :instagram_account
-
belongs_to :instagram_profile
-
-
validates :body, presence: true
-
-
scope :recent_first, -> { order(created_at: :desc) }
-
-
def queued?
-
status == "queued"
-
end
-
-
def sent?
-
status == "sent"
-
end
-
-
def failed?
-
status == "failed"
-
end
-
end
-
-
class InstagramPost < ApplicationRecord
-
belongs_to :instagram_account
-
belongs_to :instagram_profile, optional: true
-
-
has_one_attached :media
-
has_many :ai_analyses, as: :analyzable, dependent: :destroy
-
has_many :instagram_post_insights, dependent: :destroy
-
has_many :instagram_post_entities, dependent: :destroy
-
-
validates :shortcode, presence: true
-
validates :detected_at, presence: true
-
validates :status, presence: true
-
-
scope :recent_first, -> { order(detected_at: :desc, id: :desc) }
-
after_commit :broadcast_posts_table_refresh
-
-
def permalink
-
"#{Instagram::Client::INSTAGRAM_BASE_URL}/p/#{shortcode}/"
-
end
-
-
private
-
-
def broadcast_posts_table_refresh
-
Ops::LiveUpdateBroadcaster.broadcast!(
-
topic: "posts_table_changed",
-
account_id: instagram_account_id,
-
payload: { post_id: id },
-
throttle_key: "posts_table_changed"
-
)
-
end
-
end
-
class InstagramPostEntity < ApplicationRecord
-
belongs_to :instagram_account
-
belongs_to :instagram_post
-
belongs_to :instagram_post_insight
-
-
validates :entity_type, presence: true
-
validates :value, presence: true
-
-
scope :recent_first, -> { order(created_at: :desc, id: :desc) }
-
end
-
class InstagramPostFace < ApplicationRecord
-
ROLES = %w[primary_user secondary_person unknown].freeze
-
-
belongs_to :instagram_profile_post
-
belongs_to :instagram_story_person, optional: true
-
-
validates :role, presence: true, inclusion: { in: ROLES }
-
end
-
class InstagramPostInsight < ApplicationRecord
-
belongs_to :instagram_account
-
belongs_to :instagram_post
-
belongs_to :ai_analysis
-
-
has_many :instagram_post_entities, dependent: :destroy
-
-
scope :recent_first, -> { order(created_at: :desc, id: :desc) }
-
end
-
1
class InstagramProfile < ApplicationRecord
-
1
belongs_to :instagram_account
-
1
has_many :instagram_messages, dependent: :destroy
-
1
has_many :instagram_profile_events, dependent: :destroy
-
1
has_many :instagram_profile_analyses, dependent: :destroy
-
1
has_many :instagram_profile_action_logs, dependent: :destroy
-
1
has_many :instagram_profile_posts, dependent: :destroy
-
1
has_many :instagram_post_faces, through: :instagram_profile_posts
-
1
has_many :instagram_profile_post_comments, dependent: :destroy
-
1
has_many :instagram_profile_insights, dependent: :destroy
-
1
has_many :instagram_profile_message_strategies, dependent: :destroy
-
1
has_many :instagram_profile_signal_evidences, dependent: :destroy
-
1
has_many :instagram_profile_history_chunks, dependent: :destroy
-
1
has_many :instagram_stories, dependent: :destroy
-
1
has_many :instagram_story_people, dependent: :destroy
-
1
has_many :ai_analyses, as: :analyzable, dependent: :destroy
-
1
has_many :instagram_profile_taggings, dependent: :destroy
-
1
has_many :profile_tags, through: :instagram_profile_taggings
-
1
has_many :app_issues, dependent: :nullify
-
1
has_many :active_storage_ingestions, dependent: :nullify
-
1
has_one :instagram_profile_behavior_profile, dependent: :destroy
-
-
1
has_one_attached :avatar
-
-
1
validates :username, presence: true
-
1
after_commit :broadcast_profiles_table_refresh
-
-
1
def mutual?
-
following && follows_you
-
end
-
-
1
def display_label
-
display_name.presence || username
-
end
-
-
1
def recompute_last_active!
-
self.last_active_at = [ last_story_seen_at, last_post_at ].compact.max
-
end
-
-
1
def story_reply_allowed?
-
story_interaction_state.to_s == "reply_available"
-
end
-
-
1
def story_reply_retry_pending?
-
story_interaction_state.to_s == "unavailable" &&
-
story_interaction_retry_after_at.present? &&
-
story_interaction_retry_after_at > Time.current
-
end
-
-
1
def dm_allowed?
-
dm_interaction_state.to_s == "messageable" || can_message == true
-
end
-
-
1
def dm_retry_pending?
-
dm_interaction_state.to_s == "unavailable" &&
-
dm_interaction_retry_after_at.present? &&
-
dm_interaction_retry_after_at > Time.current
-
end
-
-
1
def auto_reply_enabled?
-
profile_tags.where(name: %w[automatic_reply automatic\ reply auto_reply auto\ reply]).exists?
-
end
-
-
1
def record_event!(kind:, external_id:, occurred_at: nil, metadata: {})
-
eid = external_id.to_s.strip
-
then: 0
else: 0
raise ArgumentError, "external_id is required for profile events" if eid.blank?
-
-
event = instagram_profile_events.find_or_initialize_by(kind: kind.to_s, external_id: eid)
-
event.detected_at = Time.current
-
then: 0
else: 0
event.occurred_at = occurred_at if occurred_at.present?
-
event.metadata = (event.metadata || {}).merge(metadata.to_h)
-
event.save!
-
event
-
end
-
-
1
def latest_analysis
-
ai_analyses.where(purpose: "profile").recent_first.first ||
-
instagram_profile_analyses.recent_first.first
-
end
-
-
1
def history_narrative_text(max_chunks: 3)
-
chunks = instagram_profile_history_chunks.recent_first.limit(max_chunks.to_i.clamp(1, 12)).to_a.reverse
-
chunks.map { |chunk| chunk.content.to_s.strip }.reject(&:blank?).join("\n")
-
end
-
-
1
def history_narrative_chunks(max_chunks: 6)
-
instagram_profile_history_chunks.recent_first.limit(max_chunks.to_i.clamp(1, 24)).map do |chunk|
-
{
-
sequence: chunk.sequence,
-
then: 0
else: 0
starts_at: chunk.starts_at&.iso8601,
-
then: 0
else: 0
ends_at: chunk.ends_at&.iso8601,
-
word_count: chunk.word_count,
-
entry_count: chunk.entry_count,
-
content: chunk.content.to_s
-
}
-
end
-
end
-
-
1
private
-
-
1
def broadcast_profiles_table_refresh
-
2
Ops::LiveUpdateBroadcaster.broadcast!(
-
topic: "profiles_table_changed",
-
account_id: instagram_account_id,
-
payload: { profile_id: id },
-
throttle_key: "profiles_table_changed"
-
)
-
end
-
end
-
1
class InstagramProfileActionLog < ApplicationRecord
-
ACTIONS = %w[
-
1
fetch_profile_details
-
verify_messageability
-
analyze_profile
-
analyze_profile_posts
-
capture_profile_posts
-
build_history
-
sync_avatar
-
sync_stories
-
sync_stories_debug
-
auto_story_reply
-
post_comment
-
].freeze
-
-
1
STATUSES = %w[queued running succeeded failed].freeze
-
-
1
belongs_to :instagram_account
-
1
belongs_to :instagram_profile
-
-
1
encrypts :log_text
-
-
1
after_commit :broadcast_account_audit_logs_refresh
-
-
1
validates :action, presence: true, inclusion: { in: ACTIONS }
-
1
validates :status, presence: true, inclusion: { in: STATUSES }
-
1
validates :occurred_at, presence: true
-
-
1
scope :recent_first, -> { order(occurred_at: :desc, id: :desc) }
-
-
1
def mark_running!(extra_metadata: nil)
-
2
update!(
-
status: "running",
-
started_at: started_at || Time.current,
-
metadata: merge_metadata(extra_metadata),
-
error_message: nil
-
)
-
end
-
-
1
def mark_succeeded!(extra_metadata: nil, log_text: nil)
-
1
update!(
-
status: "succeeded",
-
finished_at: Time.current,
-
metadata: merge_metadata(extra_metadata),
-
log_text: log_text.presence || self.log_text,
-
error_message: nil
-
)
-
end
-
-
1
def mark_failed!(error_message:, extra_metadata: nil)
-
update!(
-
status: "failed",
-
finished_at: Time.current,
-
metadata: merge_metadata(extra_metadata),
-
error_message: error_message.to_s
-
)
-
end
-
-
1
private
-
-
1
def broadcast_account_audit_logs_refresh
-
6
account = instagram_account
-
6
else: 6
then: 0
return unless account
-
-
6
RefreshAccountAuditLogsJob.enqueue_for(instagram_account_id: account.id, limit: 120)
-
rescue StandardError
-
nil
-
end
-
-
1
def merge_metadata(extra)
-
3
then: 1
else: 2
base = metadata.is_a?(Hash) ? metadata : {}
-
3
then: 0
else: 3
return base if extra.blank?
-
-
3
base.merge(extra.to_h)
-
end
-
end
-
class InstagramProfileAnalysis < ApplicationRecord
-
belongs_to :instagram_profile
-
-
# These contain potentially sensitive derived notes; keep them encrypted at rest.
-
encrypts :prompt
-
encrypts :response_text
-
-
validates :provider, presence: true
-
validates :status, presence: true
-
-
scope :recent_first, -> { order(created_at: :desc, id: :desc) }
-
end
-
-
class InstagramProfileBehaviorProfile < ApplicationRecord
-
belongs_to :instagram_profile
-
end
-
require "digest"
-
-
class InstagramProfileEvent < ApplicationRecord
-
include InstagramProfileEvent::LocalStoryIntelligence
-
include InstagramProfileEvent::Broadcastable
-
include InstagramProfileEvent::CommentGenerationCoordinator
-
-
belongs_to :instagram_profile
-
-
has_one_attached :media
-
has_one_attached :preview_image
-
has_many :instagram_stories, foreign_key: :source_event_id, dependent: :nullify
-
-
validates :kind, presence: true
-
validates :external_id, presence: true
-
validates :detected_at, presence: true
-
-
# LLM Comment validations
-
validates :llm_comment_provider, inclusion: { in: %w[ollama local], allow_nil: true }
-
validates :llm_comment_status, inclusion: { in: %w[not_requested queued running completed failed skipped], allow_nil: true }
-
validate :llm_comment_consistency, on: :update
-
-
after_commit :broadcast_account_audit_logs_refresh
-
after_commit :broadcast_story_archive_refresh, on: %i[create update]
-
after_commit :append_profile_history_narrative, on: :create
-
after_commit :broadcast_profile_events_refresh
-
-
STORY_ARCHIVE_EVENT_KINDS = %w[
-
story_downloaded
-
story_image_downloaded_via_feed
-
story_media_downloaded_via_feed
-
].freeze
-
-
LLM_SUCCESS_STATUSES = %w[ok].freeze
-
-
-
-
-
-
-
-
-
-
def story_archive_item?
-
STORY_ARCHIVE_EVENT_KINDS.include?(kind.to_s)
-
end
-
-
def capture_technical_details(context)
-
profile = instagram_profile
-
media_blob = media.attached? ? media.blob : nil
-
timeline = story_timeline_data
-
local_intelligence = context[:local_story_intelligence].is_a?(Hash) ? context[:local_story_intelligence] : {}
-
verified_story_facts = context[:verified_story_facts].is_a?(Hash) ? context[:verified_story_facts] : {}
-
story_ownership_classification = context[:story_ownership_classification].is_a?(Hash) ? context[:story_ownership_classification] : {}
-
generation_policy = context[:generation_policy].is_a?(Hash) ? context[:generation_policy] : {}
-
validated_story_insights = context[:validated_story_insights].is_a?(Hash) ? context[:validated_story_insights] : {}
-
profile_preparation = context[:profile_preparation].is_a?(Hash) ? context[:profile_preparation] : {}
-
verified_profile_history = Array(context[:verified_profile_history]).first(12)
-
conversational_voice = context[:conversational_voice].is_a?(Hash) ? context[:conversational_voice] : {}
-
-
{
-
timestamp: Time.current.iso8601,
-
event_id: id,
-
story_id: metadata.is_a?(Hash) ? metadata["story_id"] : nil,
-
timeline: timeline,
-
media_info: media_blob ? {
-
content_type: media_blob.content_type,
-
size_bytes: media_blob.byte_size,
-
dimensions: metadata.is_a?(Hash) ? metadata.slice("media_width", "media_height") : {},
-
url: Rails.application.routes.url_helpers.rails_blob_path(media, only_path: true)
-
} : {},
-
local_story_intelligence: local_intelligence,
-
analysis: {
-
verified_story_facts: verified_story_facts,
-
ownership_classification: story_ownership_classification,
-
generation_policy: generation_policy,
-
validated_story_insights: validated_story_insights,
-
cv_ocr_evidence: context[:cv_ocr_evidence],
-
historical_comparison: context[:historical_comparison],
-
extraction_summary: {
-
has_ocr_text: verified_story_facts[:ocr_text].to_s.present?,
-
has_transcript: verified_story_facts[:transcript].to_s.present?,
-
objects_count: Array(verified_story_facts[:objects]).size,
-
object_detections_count: Array(verified_story_facts[:object_detections]).size,
-
scenes_count: Array(verified_story_facts[:scenes]).size,
-
hashtags_count: Array(verified_story_facts[:hashtags]).size,
-
mentions_count: Array(verified_story_facts[:mentions]).size,
-
detected_usernames_count: Array(verified_story_facts[:detected_usernames]).size,
-
faces_count: verified_story_facts[:face_count].to_i,
-
signal_score: verified_story_facts[:signal_score].to_i,
-
source: verified_story_facts[:source].to_s,
-
reason: verified_story_facts[:reason].to_s.presence
-
}
-
},
-
profile_analysis: {
-
username: profile&.username,
-
display_name: profile&.display_name,
-
bio: profile&.bio,
-
bio_length: profile&.bio&.length || 0,
-
detected_author_type: determine_author_type(profile),
-
extracted_topics: extract_topics_from_profile(profile),
-
profile_comment_preparation: profile_preparation,
-
conversational_voice: conversational_voice,
-
verified_profile_history: verified_profile_history
-
},
-
prompt_engineering: {
-
final_prompt: context[:post_payload],
-
image_description: context[:image_description],
-
topics_used: context[:topics],
-
author_classification: context[:author_type],
-
historical_context: context[:historical_context],
-
historical_story_context: Array(context[:historical_story_context]).first(10),
-
historical_comparison: context[:historical_comparison],
-
verified_story_facts: verified_story_facts,
-
ownership_classification: story_ownership_classification,
-
generation_policy: generation_policy,
-
cv_ocr_evidence: context[:cv_ocr_evidence],
-
profile_comment_preparation: profile_preparation,
-
conversational_voice: conversational_voice,
-
verified_profile_history: verified_profile_history,
-
rules_applied: context[:post_payload]&.dig(:rules)
-
}
-
}
-
end
-
-
-
-
-
-
-
-
-
private
-
-
-
def append_profile_history_narrative
-
AppendProfileHistoryNarrativeJob.perform_later(
-
instagram_profile_event_id: id,
-
mode: "event"
-
)
-
rescue StandardError
-
nil
-
end
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
def resolve_people_from_faces(detected_faces:, fallback_image_bytes:, story_id:)
-
account = instagram_profile&.instagram_account
-
profile = instagram_profile
-
return [] unless account && profile
-
-
embedding_service = FaceEmbeddingService.new
-
matcher = VectorMatchingService.new
-
Array(detected_faces).first(5).filter_map do |face|
-
candidate_image_bytes = face[:image_bytes].presence || fallback_image_bytes
-
next if candidate_image_bytes.blank?
-
observation_signature = event_face_observation_signature(story_id: story_id, face: face)
-
-
vector_payload = embedding_service.embed(
-
media_payload: { story_id: story_id.to_s, media_type: "image", image_bytes: candidate_image_bytes },
-
face: face
-
)
-
vector = Array(vector_payload[:vector]).map(&:to_f)
-
next if vector.empty?
-
-
match = matcher.match_or_create!(
-
account: account,
-
profile: profile,
-
embedding: vector,
-
occurred_at: occurred_at || detected_at || Time.current,
-
observation_signature: observation_signature
-
)
-
person = match[:person]
-
update_person_face_attributes_for_event!(person: person, face: face)
-
{
-
person_id: person.id,
-
role: match[:role].to_s,
-
label: person.label.to_s.presence,
-
similarity: match[:similarity],
-
age: face[:age],
-
age_range: face[:age_range],
-
gender: face[:gender],
-
gender_score: face[:gender_score].to_f
-
}.compact
-
end
-
rescue StandardError
-
[]
-
end
-
-
def event_face_observation_signature(story_id:, face:)
-
bbox = face[:bounding_box].is_a?(Hash) ? face[:bounding_box] : {}
-
[
-
"event",
-
id,
-
story_id.to_s,
-
face[:frame_index].to_i,
-
face[:timestamp_seconds].to_f.round(3),
-
bbox["x1"],
-
bbox["y1"],
-
bbox["x2"],
-
bbox["y2"]
-
].map(&:to_s).join(":")
-
end
-
-
def update_person_face_attributes_for_event!(person:, face:)
-
return unless person
-
-
metadata = person.metadata.is_a?(Hash) ? person.metadata.deep_dup : {}
-
attrs = metadata["face_attributes"].is_a?(Hash) ? metadata["face_attributes"].deep_dup : {}
-
-
gender = face[:gender].to_s.strip.downcase
-
if gender.present?
-
gender_counts = attrs["gender_counts"].is_a?(Hash) ? attrs["gender_counts"].deep_dup : {}
-
gender_counts[gender] = gender_counts[gender].to_i + 1
-
attrs["gender_counts"] = gender_counts
-
attrs["primary_gender_cue"] = gender_counts.max_by { |_key, count| count.to_i }&.first
-
end
-
-
age_range = face[:age_range].to_s.strip
-
if age_range.present?
-
age_counts = attrs["age_range_counts"].is_a?(Hash) ? attrs["age_range_counts"].deep_dup : {}
-
age_counts[age_range] = age_counts[age_range].to_i + 1
-
attrs["age_range_counts"] = age_counts
-
attrs["primary_age_range"] = age_counts.max_by { |_key, count| count.to_i }&.first
-
end
-
-
age_value = face[:age].to_f
-
if age_value.positive?
-
samples = Array(attrs["age_samples"]).map(&:to_f).first(19)
-
samples << age_value.round(1)
-
attrs["age_samples"] = samples
-
attrs["age_estimate"] = (samples.sum / samples.length.to_f).round(1)
-
end
-
-
attrs["last_observed_at"] = Time.current.iso8601
-
metadata["face_attributes"] = attrs
-
person.update_columns(metadata: metadata, updated_at: Time.current)
-
rescue StandardError
-
nil
-
end
-
-
def recent_story_intelligence_context(profile)
-
return [] unless profile
-
-
profile.instagram_profile_events
-
.where(kind: STORY_ARCHIVE_EVENT_KINDS)
-
.order(detected_at: :desc, id: :desc)
-
.limit(18)
-
.map do |event|
-
meta = event.metadata.is_a?(Hash) ? event.metadata : {}
-
intel = meta["local_story_intelligence"].is_a?(Hash) ? meta["local_story_intelligence"] : {}
-
objects = merge_unique_values(intel["objects"], meta["content_signals"]).first(8)
-
hashtags = merge_unique_values(intel["hashtags"], meta["hashtags"]).first(8)
-
mentions = merge_unique_values(intel["mentions"], meta["mentions"]).first(6)
-
profile_handles = merge_unique_values(intel["profile_handles"], meta["profile_handles"]).first(8)
-
topics = merge_unique_values(intel["topics"], meta["topics"]).first(8)
-
ocr_text = first_present(intel["ocr_text"], meta["ocr_text"])
-
transcript = first_present(intel["transcript"], meta["transcript"])
-
scenes = normalize_hash_array(intel["scenes"], meta["scenes"]).first(20)
-
people = Array(intel["people"] || meta["face_people"]).first(10)
-
face_count = (intel["face_count"] || meta["face_count"]).to_i
-
next if objects.empty? && hashtags.empty? && mentions.empty? && profile_handles.empty? && topics.empty? && scenes.empty? && ocr_text.blank? && transcript.blank? && face_count <= 0
-
-
{
-
event_id: event.id,
-
occurred_at: event.occurred_at&.iso8601 || event.detected_at&.iso8601,
-
topics: topics,
-
objects: objects,
-
scenes: scenes,
-
hashtags: hashtags,
-
mentions: mentions,
-
profile_handles: profile_handles,
-
ocr_text: ocr_text.to_s.byteslice(0, 220),
-
transcript: transcript.to_s.byteslice(0, 220),
-
face_count: face_count,
-
scenes_count: scenes.length,
-
people: people
-
}
-
end.compact
-
rescue StandardError
-
[]
-
end
-
-
def format_story_intelligence_context(rows)
-
entries = Array(rows).first(10)
-
return "" if entries.empty?
-
-
lines = entries.map do |row|
-
parts = []
-
parts << "topics=#{Array(row[:topics]).join(',')}" if Array(row[:topics]).any?
-
parts << "objects=#{Array(row[:objects]).join(',')}" if Array(row[:objects]).any?
-
parts << "hashtags=#{Array(row[:hashtags]).join(',')}" if Array(row[:hashtags]).any?
-
parts << "mentions=#{Array(row[:mentions]).join(',')}" if Array(row[:mentions]).any?
-
parts << "handles=#{Array(row[:profile_handles]).join(',')}" if Array(row[:profile_handles]).any?
-
parts << "faces=#{row[:face_count].to_i}" if row[:face_count].to_i.positive?
-
parts << "scenes=#{row[:scenes_count].to_i}" if row[:scenes_count].to_i.positive?
-
parts << "ocr=#{row[:ocr_text]}" if row[:ocr_text].to_s.present?
-
parts << "transcript=#{row[:transcript]}" if row[:transcript].to_s.present?
-
"- #{parts.join(' | ')}"
-
end
-
-
"Recent structured story intelligence:\n#{lines.join("\n")}"
-
end
-
-
def build_compact_historical_context(profile:, historical_story_context:, verified_profile_history:, profile_preparation:)
-
summary = []
-
if profile
-
summary << profile.history_narrative_text(max_chunks: 2).to_s
-
end
-
structured = format_story_intelligence_context(historical_story_context)
-
summary << structured.to_s
-
summary << format_verified_profile_history(verified_profile_history)
-
summary << format_profile_preparation(profile_preparation)
-
-
compact = summary
-
.map(&:to_s)
-
.map(&:strip)
-
.reject(&:blank?)
-
.join("\n")
-
-
compact.byteslice(0, 650)
-
end
-
-
def latest_profile_comment_preparation(profile)
-
meta = profile&.instagram_profile_behavior_profile&.metadata
-
payload = meta.is_a?(Hash) ? meta["comment_generation_preparation"] : nil
-
payload.is_a?(Hash) ? payload.deep_symbolize_keys : {}
-
rescue StandardError
-
{}
-
end
-
-
def recent_analyzed_profile_history(profile)
-
return [] unless profile
-
-
profile.instagram_profile_posts
-
.recent_first
-
.limit(12)
-
.map do |post|
-
analysis = post.analysis.is_a?(Hash) ? post.analysis : {}
-
faces = post.instagram_post_faces
-
next if analysis.blank? && !faces.exists?
-
-
{
-
post_id: post.id,
-
shortcode: post.shortcode,
-
taken_at: post.taken_at&.iso8601,
-
caption: post.caption.to_s.byteslice(0, 220),
-
image_description: analysis["image_description"].to_s.byteslice(0, 220),
-
topics: Array(analysis["topics"]).map(&:to_s).reject(&:blank?).uniq.first(8),
-
objects: Array(analysis["objects"]).map(&:to_s).reject(&:blank?).uniq.first(8),
-
hashtags: Array(analysis["hashtags"]).map(&:to_s).reject(&:blank?).uniq.first(8),
-
mentions: Array(analysis["mentions"]).map(&:to_s).reject(&:blank?).uniq.first(8),
-
face_count: faces.count,
-
primary_face_count: faces.where(role: "primary_user").count,
-
secondary_face_count: faces.where(role: "secondary_person").count
-
}
-
end.compact
-
rescue StandardError
-
[]
-
end
-
-
def build_conversational_voice_profile(profile:, historical_story_context:, verified_profile_history:, profile_preparation:)
-
behavior_summary = profile&.instagram_profile_behavior_profile&.behavioral_summary
-
behavior_summary = {} unless behavior_summary.is_a?(Hash)
-
preparation = profile_preparation.is_a?(Hash) ? profile_preparation : {}
-
recent_comments = recent_llm_comments_for_profile(profile).first(6)
-
recent_topics = Array(verified_profile_history).flat_map { |row| Array(row[:topics]) }.map(&:to_s).reject(&:blank?).uniq.first(10)
-
recurring_story_topics = Array(historical_story_context).flat_map { |row| Array(row[:topics]) }.map(&:to_s).reject(&:blank?).uniq.first(10)
-
-
{
-
author_type: determine_author_type(profile),
-
profile_tags: profile ? profile.profile_tags.pluck(:name).sort.first(10) : [],
-
bio_keywords: extract_topics_from_profile(profile).first(10),
-
recurring_topics: (recent_topics + recurring_story_topics + Array(behavior_summary["topic_clusters"]).map(&:first)).map(&:to_s).reject(&:blank?).uniq.first(12),
-
recurring_hashtags: Array(behavior_summary["top_hashtags"]).map(&:first).map(&:to_s).reject(&:blank?).first(10),
-
frequent_people_labels: Array(behavior_summary["frequent_secondary_persons"]).map { |row| row.is_a?(Hash) ? row["label"] || row[:label] : nil }.map(&:to_s).reject(&:blank?).uniq.first(8),
-
prior_comment_examples: recent_comments.map { |value| value.to_s.byteslice(0, 120) },
-
identity_consistency: preparation[:identity_consistency].is_a?(Hash) ? preparation[:identity_consistency] : preparation["identity_consistency"],
-
profile_preparation_reason: preparation[:reason].to_s.presence || preparation["reason"].to_s.presence
-
}.compact
-
rescue StandardError
-
{}
-
end
-
-
def format_verified_profile_history(rows)
-
entries = Array(rows).first(8)
-
return "" if entries.empty?
-
-
lines = entries.map do |row|
-
parts = []
-
parts << "shortcode=#{row[:shortcode]}" if row[:shortcode].to_s.present?
-
parts << "topics=#{Array(row[:topics]).join(',')}" if Array(row[:topics]).any?
-
parts << "objects=#{Array(row[:objects]).join(',')}" if Array(row[:objects]).any?
-
parts << "hashtags=#{Array(row[:hashtags]).join(',')}" if Array(row[:hashtags]).any?
-
parts << "mentions=#{Array(row[:mentions]).join(',')}" if Array(row[:mentions]).any?
-
parts << "faces=#{row[:face_count].to_i}" if row[:face_count].to_i.positive?
-
parts << "primary_faces=#{row[:primary_face_count].to_i}" if row[:primary_face_count].to_i.positive?
-
parts << "secondary_faces=#{row[:secondary_face_count].to_i}" if row[:secondary_face_count].to_i.positive?
-
parts << "desc=#{row[:image_description]}" if row[:image_description].to_s.present?
-
"- #{parts.join(' | ')}"
-
end
-
-
"Recent analyzed profile posts:\n#{lines.join("\n")}"
-
end
-
-
def format_profile_preparation(payload)
-
data = payload.is_a?(Hash) ? payload : {}
-
return "" if data.blank?
-
-
identity = data[:identity_consistency].is_a?(Hash) ? data[:identity_consistency] : data["identity_consistency"]
-
analysis = data[:analysis].is_a?(Hash) ? data[:analysis] : data["analysis"]
-
-
parts = []
-
parts << "ready=#{ActiveModel::Type::Boolean.new.cast(data[:ready_for_comment_generation] || data["ready_for_comment_generation"])}"
-
parts << "reason=#{data[:reason_code] || data["reason_code"]}"
-
parts << "analyzed_posts=#{analysis[:analyzed_posts_count] || analysis["analyzed_posts_count"]}" if analysis.is_a?(Hash)
-
parts << "structured_posts=#{analysis[:posts_with_structured_signals_count] || analysis["posts_with_structured_signals_count"]}" if analysis.is_a?(Hash)
-
if identity.is_a?(Hash)
-
parts << "identity_consistent=#{ActiveModel::Type::Boolean.new.cast(identity[:consistent] || identity["consistent"])}"
-
parts << "identity_ratio=#{identity[:dominance_ratio] || identity["dominance_ratio"]}"
-
parts << "identity_reason=#{identity[:reason_code] || identity["reason_code"]}"
-
end
-
return "" if parts.empty?
-
-
"Profile preparation: #{parts.join(' | ')}"
-
end
-
-
def story_timeline_data
-
raw = metadata.is_a?(Hash) ? metadata : {}
-
story = instagram_stories.order(taken_at: :desc, id: :desc).first
-
posted_at = raw["upload_time"].presence || raw["taken_at"].presence || story&.taken_at&.iso8601
-
downloaded_at = raw["downloaded_at"].presence || occurred_at&.iso8601 || created_at&.iso8601
-
-
{
-
story_posted_at: posted_at,
-
downloaded_to_system_at: downloaded_at,
-
event_detected_at: detected_at&.iso8601
-
}
-
end
-
-
def estimated_generation_seconds(queue_state:)
-
base = 18
-
queue_size =
-
begin
-
require "sidekiq/api"
-
Sidekiq::Queue.new("ai").size.to_i
-
rescue StandardError
-
0
-
end
-
queue_factor = queue_state ? queue_size * 4 : [queue_size - 1, 0].max * 3
-
attempt_factor = llm_comment_attempts.to_i * 6
-
preprocess_factor = local_context_preprocess_penalty
-
(base + queue_factor + attempt_factor + preprocess_factor).clamp(10, 240)
-
end
-
-
def local_context_preprocess_penalty
-
raw = metadata.is_a?(Hash) ? metadata : {}
-
has_context = raw["local_story_intelligence"].is_a?(Hash) ||
-
raw["ocr_text"].to_s.present? ||
-
Array(raw["content_signals"]).any?
-
return 0 if has_context
-
-
media_type = media&.blob&.content_type.to_s.presence || raw["media_content_type"].to_s
-
media_type.start_with?("image/") ? 16 : 8
-
rescue StandardError
-
0
-
end
-
-
def recent_llm_comments_for_profile(profile)
-
return [] unless profile
-
-
profile.instagram_profile_events
-
.where.not(id: id)
-
.where.not(llm_generated_comment: [nil, ""])
-
.order(llm_comment_generated_at: :desc, id: :desc)
-
.limit(12)
-
.pluck(:llm_generated_comment)
-
.map(&:to_s)
-
.reject(&:blank?)
-
rescue StandardError
-
[]
-
end
-
-
def build_cv_ocr_evidence(local_story_intelligence:)
-
payload = local_story_intelligence.is_a?(Hash) ? local_story_intelligence : {}
-
{
-
source: payload[:source].to_s,
-
reason: payload[:reason].to_s.presence,
-
ocr_text: payload[:ocr_text].to_s,
-
transcript: payload[:transcript].to_s,
-
objects: Array(payload[:objects]).first(20),
-
scenes: Array(payload[:scenes]).first(20),
-
hashtags: Array(payload[:hashtags]).first(20),
-
mentions: Array(payload[:mentions]).first(20),
-
profile_handles: Array(payload[:profile_handles]).first(20),
-
source_account_reference: payload[:source_account_reference].to_s,
-
source_profile_ids: Array(payload[:source_profile_ids]).first(10),
-
media_type: payload[:media_type].to_s,
-
face_count: payload[:face_count].to_i,
-
people: Array(payload[:people]).first(10),
-
object_detections: normalize_hash_array(payload[:object_detections]).first(30),
-
ocr_blocks: normalize_hash_array(payload[:ocr_blocks]).first(30)
-
}
-
end
-
-
def build_historical_comparison(current:, historical_story_context:)
-
current_hash = current.is_a?(Hash) ? current : {}
-
current_topics = Array(current_hash[:topics]).map(&:to_s).reject(&:blank?).uniq
-
current_objects = Array(current_hash[:objects]).map(&:to_s).reject(&:blank?).uniq
-
current_scenes = Array(current_hash[:scenes]).map { |row| row.is_a?(Hash) ? row[:type] || row["type"] : row }.map(&:to_s).reject(&:blank?).uniq
-
current_hashtags = Array(current_hash[:hashtags]).map(&:to_s).reject(&:blank?).uniq
-
current_mentions = Array(current_hash[:mentions]).map(&:to_s).reject(&:blank?).uniq
-
current_profile_handles = Array(current_hash[:profile_handles]).map(&:to_s).reject(&:blank?).uniq
-
current_people = Array(current_hash[:people]).map { |row| row.is_a?(Hash) ? row[:person_id] || row["person_id"] : nil }.compact.map(&:to_s)
-
-
historical_rows = Array(historical_story_context)
-
hist_topics = historical_rows.flat_map { |row| Array(row[:topics] || row["topics"]) }.map(&:to_s).reject(&:blank?).uniq
-
hist_objects = historical_rows.flat_map { |row| Array(row[:objects] || row["objects"]) }.map(&:to_s).reject(&:blank?).uniq
-
hist_scenes = historical_rows.flat_map { |row| Array(row[:scenes] || row["scenes"]) }
-
.map { |row| row.is_a?(Hash) ? row[:type] || row["type"] : row }
-
.map(&:to_s)
-
.reject(&:blank?)
-
.uniq
-
hist_hashtags = historical_rows.flat_map { |row| Array(row[:hashtags] || row["hashtags"]) }.map(&:to_s).reject(&:blank?).uniq
-
hist_mentions = historical_rows.flat_map { |row| Array(row[:mentions] || row["mentions"]) }.map(&:to_s).reject(&:blank?).uniq
-
hist_profile_handles = historical_rows.flat_map { |row| Array(row[:profile_handles] || row["profile_handles"]) }.map(&:to_s).reject(&:blank?).uniq
-
hist_people = historical_rows.flat_map { |row| Array(row[:people] || row["people"]) }
-
.map { |row| row.is_a?(Hash) ? row[:person_id] || row["person_id"] : nil }
-
.compact
-
.map(&:to_s)
-
.uniq
-
-
{
-
shared_topics: (current_topics & hist_topics).first(12),
-
novel_topics: (current_topics - hist_topics).first(12),
-
shared_objects: (current_objects & hist_objects).first(12),
-
novel_objects: (current_objects - hist_objects).first(12),
-
shared_scenes: (current_scenes & hist_scenes).first(12),
-
novel_scenes: (current_scenes - hist_scenes).first(12),
-
recurring_hashtags: (current_hashtags & hist_hashtags).first(12),
-
recurring_mentions: (current_mentions & hist_mentions).first(12),
-
recurring_profile_handles: (current_profile_handles & hist_profile_handles).first(12),
-
recurring_people_ids: (current_people & hist_people).first(12),
-
has_historical_overlap: ((current_topics & hist_topics).any? || (current_objects & hist_objects).any? || (current_scenes & hist_scenes).any? || (current_hashtags & hist_hashtags).any? || (current_profile_handles & hist_profile_handles).any?)
-
}
-
end
-
-
def normalize_hash_array(*values)
-
values.flat_map { |value| Array(value) }.select { |row| row.is_a?(Hash) }
-
end
-
-
def normalize_people_rows(*values)
-
rows = values.flat_map { |value| Array(value) }
-
-
rows.filter_map do |row|
-
next unless row.is_a?(Hash)
-
-
{
-
person_id: row[:person_id] || row["person_id"],
-
role: (row[:role] || row["role"]).to_s.presence,
-
label: (row[:label] || row["label"]).to_s.presence,
-
similarity: (row[:similarity] || row["similarity"] || row[:match_similarity] || row["match_similarity"]).to_f,
-
relationship: (row[:relationship] || row["relationship"]).to_s.presence,
-
appearances: (row[:appearances] || row["appearances"]).to_i,
-
linked_usernames: Array(row[:linked_usernames] || row["linked_usernames"]).map(&:to_s).reject(&:blank?).first(8),
-
age: (row[:age] || row["age"]).to_f.positive? ? (row[:age] || row["age"]).to_f.round(1) : nil,
-
age_range: (row[:age_range] || row["age_range"]).to_s.presence,
-
gender: (row[:gender] || row["gender"]).to_s.presence,
-
gender_score: (row[:gender_score] || row["gender_score"]).to_f
-
}.compact
-
end.uniq { |row| [ row[:person_id], row[:role], row[:similarity].to_f.round(3), row[:label] ] }
-
end
-
-
def normalize_object_detections(*values, limit: 120)
-
rows = normalize_hash_array(*values).map do |row|
-
label = (row[:label] || row["label"] || row[:description] || row["description"]).to_s.downcase.strip
-
next if label.blank?
-
-
{
-
label: label,
-
confidence: (row[:confidence] || row["confidence"] || row[:score] || row["score"] || row[:max_confidence] || row["max_confidence"]).to_f,
-
bbox: row[:bbox].is_a?(Hash) ? row[:bbox] : (row["bbox"].is_a?(Hash) ? row["bbox"] : {}),
-
timestamps: Array(row[:timestamps] || row["timestamps"]).map(&:to_f).first(80)
-
}
-
end.compact
-
-
rows
-
.uniq { |row| [ row[:label], row[:bbox], row[:timestamps].first(6) ] }
-
.sort_by { |row| -row[:confidence].to_f }
-
.first(limit.to_i.clamp(1, 300))
-
end
-
-
def story_excluded_from_narrative?(ownership:, policy:)
-
ownership_hash = ownership.is_a?(Hash) ? ownership : {}
-
policy_hash = policy.is_a?(Hash) ? policy : {}
-
label = (ownership_hash[:label] || ownership_hash["label"]).to_s
-
return true if %w[reshare third_party_content unrelated_post meme_reshare].include?(label)
-
-
allow_comment_value = if policy_hash.key?(:allow_comment)
-
policy_hash[:allow_comment]
-
else
-
policy_hash["allow_comment"]
-
end
-
allow_comment = ActiveModel::Type::Boolean.new.cast(allow_comment_value)
-
reason_code = (policy_hash[:reason_code] || policy_hash["reason_code"]).to_s
-
!allow_comment && reason_code.match?(/(reshare|third_party|unrelated|meme)/)
-
end
-
-
def extract_source_account_reference(raw:, story_meta:)
-
value = raw["story_ref"].to_s.presence || story_meta["story_ref"].to_s.presence
-
value = value.delete_suffix(":") if value.to_s.present?
-
return value if value.to_s.present?
-
-
url = raw["story_url"].to_s.presence || raw["permalink"].to_s.presence || story_meta["story_url"].to_s.presence
-
return nil if url.blank?
-
-
match = url.match(%r{instagram\.com/stories/([a-zA-Z0-9._]+)/?}i) || url.match(%r{instagram\.com/([a-zA-Z0-9._]+)/?}i)
-
match ? match[1].to_s.downcase : nil
-
end
-
-
def extract_source_profile_ids_from_metadata(raw:, story_meta:)
-
rows = []
-
%w[source_profile_id owner_id profile_id user_id source_user_id].each do |key|
-
value = raw[key] || story_meta[key]
-
rows << value.to_s if value.to_s.match?(/\A\d+\z/)
-
end
-
story_id = raw["story_id"].to_s.presence || story_meta["story_id"].to_s
-
story_id.to_s.scan(/(?<!\w)\d{5,}(?!\w)/).each { |token| rows << token }
-
rows.uniq.first(10)
-
end
-
-
def determine_author_type(profile)
-
return "unknown" unless profile
-
-
bio = profile.bio.to_s.downcase
-
-
if bio.include?("creator") || bio.include?("artist")
-
"creator"
-
elsif bio.include?("business") || bio.include?("entrepreneur")
-
"business"
-
else
-
"personal"
-
end
-
end
-
-
def extract_topics_from_profile(profile)
-
return [] unless profile&.bio
-
-
topics = []
-
bio = profile.bio.downcase
-
-
topic_keywords = {
-
"fitness" => %w[fitness gym workout health],
-
"food" => %w[food cooking chef recipe],
-
"travel" => %w[travel wanderlust adventure],
-
"fashion" => %w[fashion style outfit beauty],
-
"tech" => %w[tech technology coding software],
-
"art" => %w[art artist creative design],
-
"business" => %w[business entrepreneur startup],
-
"photography" => %w[photography photo camera]
-
}
-
-
topic_keywords.each do |topic, keywords|
-
topics << topic if keywords.any? { |keyword| bio.include?(keyword) }
-
end
-
-
topics.uniq
-
end
-
end
-
require 'active_support/concern'
-
-
module InstagramProfileEvent::Broadcastable
-
extend ActiveSupport::Concern
-
-
included do
-
def broadcast_llm_comment_generation_queued(job_id: nil)
-
account = instagram_profile&.instagram_account
-
return unless account
-
-
ActionCable.server.broadcast(
-
"llm_comment_generation_#{account.id}",
-
{
-
event_id: id,
-
status: "queued",
-
job_id: job_id.to_s.presence || llm_comment_job_id,
-
message: "Comment generation queued",
-
estimated_seconds: estimated_generation_seconds(queue_state: true),
-
progress: 5
-
}
-
)
-
rescue StandardError
-
nil
-
end
-
def broadcast_llm_comment_generation_update(generation_result)
-
account = instagram_profile&.instagram_account
-
return unless account
-
-
ActionCable.server.broadcast(
-
"llm_comment_generation_#{account.id}",
-
{
-
event_id: id,
-
status: "completed",
-
comment: llm_generated_comment,
-
generated_at: llm_comment_generated_at,
-
model: llm_comment_model,
-
provider: llm_comment_provider,
-
relevance_score: llm_comment_relevance_score,
-
generation_result: generation_result
-
}
-
)
-
rescue StandardError
-
nil
-
end
-
def broadcast_llm_comment_generation_start
-
account = instagram_profile&.instagram_account
-
return unless account
-
-
ActionCable.server.broadcast(
-
"llm_comment_generation_#{account.id}",
-
{
-
event_id: id,
-
status: "started",
-
message: "Generating comment...",
-
estimated_seconds: estimated_generation_seconds(queue_state: false),
-
progress: 12
-
}
-
)
-
rescue StandardError
-
nil
-
end
-
def broadcast_llm_comment_generation_error(error_message)
-
account = instagram_profile&.instagram_account
-
return unless account
-
-
ActionCable.server.broadcast(
-
"llm_comment_generation_#{account.id}",
-
{
-
event_id: id,
-
status: "error",
-
error: error_message,
-
message: "Failed to generate comment"
-
}
-
)
-
rescue StandardError
-
nil
-
end
-
def broadcast_llm_comment_generation_skipped(message:, reason: nil, source: nil)
-
account = instagram_profile&.instagram_account
-
return unless account
-
-
ActionCable.server.broadcast(
-
"llm_comment_generation_#{account.id}",
-
{
-
event_id: id,
-
status: "skipped",
-
message: message.to_s.presence || "Comment generation skipped",
-
reason: reason.to_s.presence,
-
source: source.to_s.presence
-
}.compact
-
)
-
rescue StandardError
-
nil
-
end
-
def broadcast_llm_comment_generation_progress(stage:, message:, progress:)
-
account = instagram_profile&.instagram_account
-
return unless account
-
-
ActionCable.server.broadcast(
-
"llm_comment_generation_#{account.id}",
-
{
-
event_id: id,
-
status: "running",
-
stage: stage.to_s,
-
message: message.to_s,
-
progress: progress.to_i.clamp(0, 100),
-
estimated_seconds: estimated_generation_seconds(queue_state: false)
-
}
-
)
-
rescue StandardError
-
nil
-
end
-
def self.broadcast_story_archive_refresh!(account:)
-
return unless account
-
-
Turbo::StreamsChannel.broadcast_replace_to(
-
[account, :story_archive],
-
target: "story_media_archive_refresh_signal",
-
partial: "instagram_accounts/story_archive_refresh_signal",
-
locals: { refreshed_at: Time.current }
-
)
-
rescue StandardError
-
nil
-
end
-
def broadcast_account_audit_logs_refresh
-
account = instagram_profile&.instagram_account
-
return unless account
-
-
RefreshAccountAuditLogsJob.enqueue_for(instagram_account_id: account.id, limit: 120)
-
rescue StandardError
-
nil
-
end
-
def broadcast_story_archive_refresh
-
return unless STORY_ARCHIVE_EVENT_KINDS.include?(kind.to_s)
-
-
account = instagram_profile&.instagram_account
-
self.class.broadcast_story_archive_refresh!(account: account)
-
rescue StandardError
-
nil
-
end
-
def broadcast_profile_events_refresh
-
account_id = instagram_profile&.instagram_account_id
-
return unless account_id
-
-
Ops::LiveUpdateBroadcaster.broadcast!(
-
topic: "profile_events_changed",
-
account_id: account_id,
-
payload: { profile_id: instagram_profile_id, event_id: id },
-
throttle_key: "profile_events_changed:#{instagram_profile_id}"
-
)
-
rescue StandardError
-
nil
-
end
-
-
end
-
end
-
require 'active_support/concern'
-
-
module InstagramProfileEvent::CommentGenerationCoordinator
-
extend ActiveSupport::Concern
-
-
included do
-
def has_llm_generated_comment?
-
llm_generated_comment.present?
-
end
-
def llm_comment_in_progress?
-
%w[queued running].include?(llm_comment_status.to_s)
-
end
-
def queue_llm_comment_generation!(job_id: nil)
-
update!(
-
llm_comment_status: "queued",
-
llm_comment_job_id: job_id.to_s.presence || llm_comment_job_id,
-
llm_comment_last_error: nil
-
)
-
-
broadcast_llm_comment_generation_queued(job_id: job_id)
-
end
-
def mark_llm_comment_running!(job_id: nil)
-
update!(
-
llm_comment_status: "running",
-
llm_comment_job_id: job_id.to_s.presence || llm_comment_job_id,
-
llm_comment_attempts: llm_comment_attempts.to_i + 1,
-
llm_comment_last_error: nil
-
)
-
-
broadcast_llm_comment_generation_start
-
end
-
def mark_llm_comment_failed!(error:)
-
update!(
-
llm_comment_status: "failed",
-
llm_comment_last_error: error.message.to_s,
-
llm_comment_metadata: (llm_comment_metadata.is_a?(Hash) ? llm_comment_metadata : {}).merge(
-
"last_failure" => {
-
"error_class" => error.class.name,
-
"error_message" => error.message.to_s,
-
"failed_at" => Time.current.iso8601
-
}
-
)
-
)
-
-
broadcast_llm_comment_generation_error(error.message)
-
rescue StandardError
-
nil
-
end
-
def mark_llm_comment_skipped!(message:, reason: nil, source: nil)
-
intel_status =
-
if source.to_s == "validated_story_policy"
-
"policy_blocked"
-
else
-
"unavailable"
-
end
-
details = {
-
"error_class" => "LocalStoryIntelligenceUnavailableError",
-
"error_message" => message.to_s,
-
"failed_at" => Time.current.iso8601,
-
"reason" => reason.to_s.presence,
-
"source" => source.to_s.presence
-
}.compact
-
-
update!(
-
llm_comment_status: "skipped",
-
llm_comment_last_error: message.to_s,
-
llm_comment_metadata: (llm_comment_metadata.is_a?(Hash) ? llm_comment_metadata : {}).merge(
-
"last_failure" => details,
-
"local_story_intelligence_status" => intel_status
-
)
-
)
-
-
broadcast_llm_comment_generation_skipped(
-
message: message.to_s,
-
reason: reason,
-
source: source
-
)
-
rescue StandardError
-
nil
-
end
-
def generate_llm_comment!(provider: :local, model: nil)
-
if has_llm_generated_comment?
-
update_columns(
-
llm_comment_status: "completed",
-
llm_comment_last_error: nil,
-
updated_at: Time.current
-
)
-
-
return {
-
status: "already_completed",
-
selected_comment: llm_generated_comment,
-
relevance_score: llm_comment_relevance_score
-
}
-
end
-
-
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC) rescue nil
-
context = build_comment_context
-
local_intel = context[:local_story_intelligence].is_a?(Hash) ? context[:local_story_intelligence] : {}
-
validated_story_insights = context[:validated_story_insights].is_a?(Hash) ? context[:validated_story_insights] : {}
-
generation_policy = validated_story_insights[:generation_policy].is_a?(Hash) ? validated_story_insights[:generation_policy] : {}
-
persist_validated_story_insights!(validated_story_insights)
-
persist_local_story_intelligence!(local_intel)
-
if local_story_intelligence_blank?(local_intel)
-
reason = local_intel[:reason].to_s.presence || "local_story_intelligence_blank"
-
source = local_intel[:source].to_s.presence || "unknown"
-
raise LocalStoryIntelligenceUnavailableError.new(
-
"Local story intelligence unavailable (reason: #{reason}, source: #{source}).",
-
reason: reason,
-
source: source
-
)
-
end
-
unless ActiveModel::Type::Boolean.new.cast(generation_policy[:allow_comment])
-
policy_reason_code = generation_policy[:reason_code].to_s.presence || "policy_blocked"
-
policy_reason = generation_policy[:reason].to_s.presence || "Comment generation blocked by verified story policy."
-
raise LocalStoryIntelligenceUnavailableError.new(
-
policy_reason,
-
reason: policy_reason_code,
-
source: "validated_story_policy"
-
)
-
end
-
broadcast_llm_comment_generation_progress(stage: "context_ready", message: "Context prepared from local story intelligence.", progress: 20)
-
technical_details = capture_technical_details(context)
-
broadcast_llm_comment_generation_progress(stage: "model_running", message: "Generating suggestions with local model.", progress: 55)
-
-
generator = Ai::LocalEngagementCommentGenerator.new(
-
ollama_client: Ai::OllamaClient.new,
-
model: model
-
)
-
-
result = generator.generate!(
-
post_payload: context[:post_payload],
-
image_description: context[:image_description],
-
topics: context[:topics],
-
author_type: context[:author_type],
-
historical_comments: context[:historical_comments],
-
historical_context: context[:historical_context],
-
historical_story_context: context[:historical_story_context],
-
local_story_intelligence: context[:local_story_intelligence],
-
historical_comparison: context[:historical_comparison],
-
cv_ocr_evidence: context[:cv_ocr_evidence],
-
verified_story_facts: context[:verified_story_facts],
-
story_ownership_classification: context[:story_ownership_classification],
-
generation_policy: context[:generation_policy],
-
profile_preparation: context[:profile_preparation],
-
verified_profile_history: context[:verified_profile_history],
-
conversational_voice: context[:conversational_voice]
-
)
-
enhanced_result = result.merge(technical_details: technical_details)
-
-
unless LLM_SUCCESS_STATUSES.include?(result[:status].to_s)
-
raise "Local pipeline did not produce valid model suggestions (fallback blocked): #{result[:error_message]}"
-
end
-
-
ranked = Ai::CommentRelevanceScorer.rank(
-
suggestions: result[:comment_suggestions],
-
image_description: context[:image_description],
-
topics: context[:topics],
-
historical_comments: context[:historical_comments]
-
)
-
-
selected_comment, score = ranked.first
-
raise "No valid comment suggestions generated" if selected_comment.to_s.blank?
-
-
duration_ms =
-
if started_at
-
((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at) * 1000.0).round
-
end
-
-
update!(
-
llm_generated_comment: selected_comment,
-
llm_comment_generated_at: Time.current,
-
llm_comment_model: result[:model],
-
llm_comment_provider: provider.to_s,
-
llm_comment_status: "completed",
-
llm_comment_relevance_score: score,
-
llm_comment_last_error: nil,
-
llm_comment_metadata: (llm_comment_metadata.is_a?(Hash) ? llm_comment_metadata : {}).merge(
-
"prompt" => result[:prompt],
-
"source" => result[:source],
-
"fallback_used" => ActiveModel::Type::Boolean.new.cast(result[:fallback_used]),
-
"generation_status" => result[:status],
-
"technical_details" => technical_details,
-
"local_story_intelligence" => context[:local_story_intelligence],
-
"historical_story_context_used" => Array(context[:historical_story_context]).first(12),
-
"historical_comparison" => context[:historical_comparison],
-
"cv_ocr_evidence" => context[:cv_ocr_evidence],
-
"verified_story_facts" => context[:verified_story_facts],
-
"ownership_classification" => context[:story_ownership_classification],
-
"generation_policy" => context[:generation_policy],
-
"validated_story_insights" => context[:validated_story_insights],
-
"ranked_candidates" => ranked.first(8).map { |text, value| { "comment" => text, "score" => value } },
-
"selected_comment" => selected_comment,
-
"selected_relevance_score" => score,
-
"generated_at" => Time.current.iso8601,
-
"processing_ms" => duration_ms,
-
"pipeline" => "validated_story_intelligence_v3"
-
)
-
)
-
-
broadcast_llm_comment_generation_progress(stage: "completed", message: "Comment ready.", progress: 100)
-
broadcast_story_archive_refresh
-
broadcast_llm_comment_generation_update(
-
enhanced_result.merge(
-
selected_comment: selected_comment,
-
relevance_score: score,
-
ranked_candidates: ranked.first(8)
-
)
-
)
-
-
enhanced_result.merge(
-
selected_comment: selected_comment,
-
relevance_score: score,
-
ranked_candidates: ranked.first(8)
-
)
-
end
-
def reply_comment
-
metadata["reply_comment"] if metadata.is_a?(Hash)
-
end
-
def llm_comment_consistency
-
status = llm_comment_status.to_s
-
-
if status == "completed" && llm_generated_comment.blank?
-
errors.add(:llm_generated_comment, "must be present when status is completed")
-
end
-
-
if status == "completed" && llm_comment_generated_at.blank?
-
errors.add(:llm_comment_generated_at, "must be present when status is completed")
-
end
-
-
if status == "completed" && llm_comment_provider.blank?
-
errors.add(:llm_comment_provider, "must be present when status is completed")
-
end
-
-
if llm_generated_comment.blank? && llm_comment_generated_at.present?
-
errors.add(:llm_generated_comment, "must be present when generated_at is set")
-
end
-
end
-
def build_comment_context
-
profile = instagram_profile
-
raw_metadata = metadata.is_a?(Hash) ? metadata : {}
-
local_story_intelligence = local_story_intelligence_payload
-
validated_story_insights = Ai::VerifiedStoryInsightBuilder.new(
-
profile: profile,
-
local_story_intelligence: local_story_intelligence,
-
metadata: raw_metadata
-
).build
-
verified_story_facts = validated_story_insights[:verified_story_facts].is_a?(Hash) ? validated_story_insights[:verified_story_facts] : {}
-
-
post_payload = {
-
post: {
-
event_id: id,
-
media_type: raw_metadata["media_type"].to_s.presence || media&.blob&.content_type.to_s.presence || "unknown"
-
},
-
author_profile: {
-
username: profile&.username,
-
display_name: profile&.display_name,
-
bio_keywords: extract_topics_from_profile(profile).first(10)
-
},
-
rules: {
-
max_length: 140,
-
require_local_pipeline: true,
-
require_verified_story_facts: true,
-
block_unverified_generation: true,
-
verified_only: true
-
}
-
}
-
-
image_description = build_story_image_description(local_story_intelligence: verified_story_facts.presence || local_story_intelligence)
-
-
historical_comments = recent_llm_comments_for_profile(profile)
-
topics = (Array(verified_story_facts[:topics]) + extract_topics_from_profile(profile)).map(&:to_s).reject(&:blank?).uniq.first(20)
-
historical_story_context = recent_story_intelligence_context(profile)
-
profile_preparation = latest_profile_comment_preparation(profile)
-
verified_profile_history = recent_analyzed_profile_history(profile)
-
conversational_voice = build_conversational_voice_profile(
-
profile: profile,
-
historical_story_context: historical_story_context,
-
verified_profile_history: verified_profile_history,
-
profile_preparation: profile_preparation
-
)
-
historical_comparison = build_historical_comparison(
-
current: verified_story_facts.presence || local_story_intelligence,
-
historical_story_context: historical_story_context
-
)
-
validated_story_insights = apply_historical_validation(
-
validated_story_insights: validated_story_insights,
-
historical_comparison: historical_comparison
-
)
-
story_ownership_classification = validated_story_insights[:ownership_classification].is_a?(Hash) ? validated_story_insights[:ownership_classification] : {}
-
generation_policy = validated_story_insights[:generation_policy].is_a?(Hash) ? validated_story_insights[:generation_policy] : {}
-
cv_ocr_evidence = build_cv_ocr_evidence(local_story_intelligence: verified_story_facts.presence || local_story_intelligence)
-
-
post_payload[:historical_comparison] = historical_comparison
-
post_payload[:cv_ocr_evidence] = cv_ocr_evidence
-
post_payload[:story_ownership_classification] = story_ownership_classification
-
post_payload[:generation_policy] = generation_policy
-
post_payload[:profile_comment_preparation] = profile_preparation
-
post_payload[:conversational_voice] = conversational_voice
-
post_payload[:verified_profile_history] = verified_profile_history
-
historical_context = build_compact_historical_context(
-
profile: profile,
-
historical_story_context: historical_story_context,
-
verified_profile_history: verified_profile_history,
-
profile_preparation: profile_preparation
-
)
-
-
{
-
post_payload: post_payload,
-
image_description: image_description,
-
topics: topics,
-
author_type: determine_author_type(profile),
-
historical_comments: historical_comments,
-
historical_context: historical_context,
-
historical_story_context: historical_story_context,
-
historical_comparison: historical_comparison,
-
cv_ocr_evidence: cv_ocr_evidence,
-
local_story_intelligence: local_story_intelligence,
-
verified_story_facts: verified_story_facts,
-
story_ownership_classification: story_ownership_classification,
-
generation_policy: generation_policy,
-
validated_story_insights: validated_story_insights,
-
profile_preparation: profile_preparation,
-
verified_profile_history: verified_profile_history,
-
conversational_voice: conversational_voice
-
}
-
end
-
-
end
-
end
-
require 'active_support/concern'
-
-
module InstagramProfileEvent::LocalStoryIntelligence
-
extend ActiveSupport::Concern
-
-
included do
-
class LocalStoryIntelligenceUnavailableError < StandardError
-
attr_reader :reason, :source
-
-
def initialize(message = nil, reason: nil, source: nil)
-
@reason = reason.to_s.presence
-
@source = source.to_s.presence
-
super(message || "Local story intelligence unavailable")
-
end
-
end
-
def local_story_intelligence_payload
-
raw = metadata.is_a?(Hash) ? metadata : {}
-
story = instagram_stories.order(updated_at: :desc, id: :desc).first
-
story_meta = story&.metadata.is_a?(Hash) ? story.metadata : {}
-
story_embedded = story_meta["content_understanding"].is_a?(Hash) ? story_meta["content_understanding"] : {}
-
event_embedded = raw["local_story_intelligence"].is_a?(Hash) ? raw["local_story_intelligence"] : {}
-
embedded = story_embedded.presence || event_embedded.presence || {}
-
-
ocr_text = first_present(
-
embedded["ocr_text"],
-
event_embedded["ocr_text"],
-
story_meta["ocr_text"],
-
raw["ocr_text"]
-
)
-
transcript = first_present(
-
embedded["transcript"],
-
event_embedded["transcript"],
-
story_meta["transcript"],
-
raw["transcript"]
-
)
-
objects = merge_unique_values(
-
embedded["objects"],
-
event_embedded["objects"],
-
story_meta["content_signals"],
-
raw["content_signals"]
-
)
-
hashtags = merge_unique_values(
-
embedded["hashtags"],
-
event_embedded["hashtags"],
-
story_meta["hashtags"],
-
raw["hashtags"]
-
)
-
mentions = merge_unique_values(
-
embedded["mentions"],
-
event_embedded["mentions"],
-
story_meta["mentions"],
-
raw["mentions"]
-
)
-
profile_handles = merge_unique_values(
-
embedded["profile_handles"],
-
event_embedded["profile_handles"],
-
story_meta["profile_handles"],
-
raw["profile_handles"]
-
)
-
scenes = normalize_hash_array(
-
embedded["scenes"],
-
event_embedded["scenes"],
-
story_meta["scenes"],
-
raw["scenes"]
-
)
-
ocr_blocks = normalize_hash_array(
-
embedded["ocr_blocks"],
-
event_embedded["ocr_blocks"],
-
story_meta["ocr_blocks"],
-
raw["ocr_blocks"]
-
)
-
ocr_text_from_blocks = ocr_blocks
-
.map { |row| row.is_a?(Hash) ? (row["text"] || row[:text]) : nil }
-
.map(&:to_s)
-
.map(&:strip)
-
.reject(&:blank?)
-
.uniq
-
.join("\n")
-
.presence
-
ocr_text = first_present(ocr_text, ocr_text_from_blocks)
-
if hashtags.empty? && ocr_text.to_s.present?
-
hashtags = ocr_text.to_s.scan(/#[a-zA-Z0-9_]+/).map(&:downcase).uniq.first(20)
-
end
-
if mentions.empty? && ocr_text.to_s.present?
-
mentions = ocr_text.to_s.scan(/@[a-zA-Z0-9._]+/).map(&:downcase).uniq.first(20)
-
end
-
if profile_handles.empty? && ocr_text.to_s.present?
-
profile_handles = ocr_text.to_s.scan(/\b[a-zA-Z0-9._]{3,30}\b/)
-
.map(&:downcase)
-
.select { |token| token.include?("_") || token.include?(".") }
-
.reject { |token| token.include?("instagram.com") }
-
.uniq
-
.first(30)
-
end
-
object_detections = normalize_hash_array(
-
embedded["object_detections"],
-
event_embedded["object_detections"],
-
story_meta["object_detections"],
-
raw["object_detections"]
-
)
-
detected_object_labels = object_detections
-
.map { |row| row.is_a?(Hash) ? (row[:label] || row["label"] || row[:description] || row["description"]) : nil }
-
.map(&:to_s)
-
.map(&:strip)
-
.reject(&:blank?)
-
objects = merge_unique_values(objects, detected_object_labels)
-
topics = merge_unique_values(
-
embedded["topics"],
-
objects,
-
hashtags.map { |tag| tag.to_s.delete_prefix("#") }
-
)
-
-
normalized_people = normalize_people_rows(
-
event_embedded["people"],
-
raw["face_people"],
-
raw["people"],
-
story_meta["face_people"],
-
story_meta["participants"],
-
story_meta.dig("face_identity", "participants"),
-
raw["participants"],
-
raw.dig("face_identity", "participants")
-
)
-
computed_face_count = [
-
(event_embedded["face_count"] || embedded["faces_count"] || raw["face_count"]).to_i,
-
normalized_people.size
-
].max
-
-
payload = {
-
ocr_text: ocr_text.to_s.presence,
-
transcript: transcript.to_s.presence,
-
objects: objects,
-
hashtags: hashtags,
-
mentions: mentions,
-
profile_handles: profile_handles,
-
topics: topics,
-
scenes: scenes.first(80),
-
ocr_blocks: ocr_blocks.first(120),
-
object_detections: normalize_object_detections(object_detections, limit: 120),
-
face_count: computed_face_count,
-
people: normalized_people.first(12),
-
source_account_reference: extract_source_account_reference(raw: raw, story_meta: story_meta),
-
source_profile_ids: extract_source_profile_ids_from_metadata(raw: raw, story_meta: story_meta),
-
media_type: raw["media_type"].to_s.presence || story_meta["media_type"].to_s.presence || media&.blob&.content_type.to_s.presence,
-
source: if story_embedded.present?
-
"story_processing"
-
elsif event_embedded.present?
-
"event_local_pipeline"
-
else
-
"event_metadata"
-
end
-
}
-
-
needs_structured_enrichment =
-
media.attached? &&
-
Array(payload[:object_detections]).empty? &&
-
Array(payload[:ocr_blocks]).empty? &&
-
Array(payload[:scenes]).empty?
-
-
if needs_structured_enrichment
-
extracted = extract_live_local_intelligence_from_event_media(story_id: raw["story_id"].to_s.presence || id.to_s)
-
if extracted.is_a?(Hash)
-
merged_scenes = normalize_hash_array(payload[:scenes], extracted[:scenes]).first(80)
-
merged_ocr_blocks = normalize_hash_array(payload[:ocr_blocks], extracted[:ocr_blocks]).first(120)
-
merged_object_detections = normalize_object_detections(payload[:object_detections], extracted[:object_detections], limit: 120)
-
-
payload[:scenes] = merged_scenes
-
payload[:ocr_blocks] = merged_ocr_blocks
-
payload[:object_detections] = merged_object_detections
-
-
if merged_scenes.any? || merged_ocr_blocks.any? || merged_object_detections.any?
-
payload[:source] = "live_local_enrichment"
-
end
-
end
-
end
-
-
if local_story_intelligence_blank?(payload) && media.attached?
-
extracted = extract_live_local_intelligence_from_event_media(story_id: raw["story_id"].to_s.presence || id.to_s)
-
if extracted.is_a?(Hash)
-
if !local_story_intelligence_blank?(extracted)
-
payload = extracted
-
elsif extracted[:reason].to_s.present?
-
payload[:reason] = extracted[:reason].to_s
-
end
-
end
-
end
-
-
if local_story_intelligence_blank?(payload)
-
payload[:source] = "unavailable"
-
payload[:reason] = payload[:reason].to_s.presence || "local_ai_extraction_empty"
-
end
-
-
payload
-
rescue StandardError
-
{
-
ocr_text: nil,
-
transcript: nil,
-
objects: [],
-
hashtags: [],
-
mentions: [],
-
profile_handles: [],
-
topics: [],
-
scenes: [],
-
ocr_blocks: [],
-
object_detections: [],
-
source: "unavailable"
-
}
-
end
-
def persist_local_story_intelligence!(payload)
-
return unless payload.is_a?(Hash)
-
source = payload[:source].to_s
-
return if source.blank? || source == "unavailable"
-
-
current_meta = metadata.is_a?(Hash) ? metadata.deep_dup : {}
-
current_intel = current_meta["local_story_intelligence"].is_a?(Hash) ? current_meta["local_story_intelligence"] : {}
-
-
current_meta["ocr_text"] = payload[:ocr_text].to_s if payload[:ocr_text].present?
-
current_meta["transcript"] = payload[:transcript].to_s if payload[:transcript].present?
-
current_meta["content_signals"] = Array(payload[:objects]).map(&:to_s).reject(&:blank?).first(40)
-
current_meta["hashtags"] = Array(payload[:hashtags]).map(&:to_s).reject(&:blank?).first(20)
-
current_meta["mentions"] = Array(payload[:mentions]).map(&:to_s).reject(&:blank?).first(20)
-
current_meta["profile_handles"] = Array(payload[:profile_handles]).map(&:to_s).reject(&:blank?).first(30)
-
current_meta["topics"] = Array(payload[:topics]).map(&:to_s).reject(&:blank?).first(40)
-
current_meta["scenes"] = normalize_hash_array(payload[:scenes]).first(80)
-
current_meta["ocr_blocks"] = normalize_hash_array(payload[:ocr_blocks]).first(120)
-
current_meta["object_detections"] = normalize_object_detections(payload[:object_detections], limit: 120)
-
current_meta["face_count"] = payload[:face_count].to_i if payload[:face_count].to_i.positive?
-
current_meta["face_people"] = Array(payload[:people]).first(12) if Array(payload[:people]).any?
-
current_meta["local_story_intelligence"] = {
-
"source" => source,
-
"captured_at" => Time.current.iso8601,
-
"ocr_text" => payload[:ocr_text].to_s.presence,
-
"transcript" => payload[:transcript].to_s.presence,
-
"objects" => Array(payload[:objects]).first(40),
-
"hashtags" => Array(payload[:hashtags]).first(30),
-
"mentions" => Array(payload[:mentions]).first(30),
-
"profile_handles" => Array(payload[:profile_handles]).first(30),
-
"topics" => Array(payload[:topics]).first(40),
-
"scenes" => normalize_hash_array(payload[:scenes]).first(80),
-
"ocr_blocks" => normalize_hash_array(payload[:ocr_blocks]).first(120),
-
"object_detections" => normalize_object_detections(payload[:object_detections], limit: 120),
-
"face_count" => payload[:face_count].to_i,
-
"people" => Array(payload[:people]).first(12)
-
}
-
current_meta["local_story_intelligence_history_appended_at"] = Time.current.iso8601
-
-
update_columns(metadata: current_meta, updated_at: Time.current)
-
ownership = current_meta["story_ownership_classification"].is_a?(Hash) ? current_meta["story_ownership_classification"] : {}
-
policy = current_meta["story_generation_policy"].is_a?(Hash) ? current_meta["story_generation_policy"] : {}
-
return if story_excluded_from_narrative?(ownership: ownership, policy: policy)
-
-
history_payload = payload.merge(description: build_story_image_description(local_story_intelligence: payload))
-
AppendProfileHistoryNarrativeJob.perform_later(
-
instagram_profile_event_id: id,
-
mode: "story_intelligence",
-
intelligence: history_payload
-
)
-
rescue StandardError
-
nil
-
end
-
def persist_validated_story_insights!(payload)
-
return unless payload.is_a?(Hash)
-
verified_story_facts = payload[:verified_story_facts].is_a?(Hash) ? payload[:verified_story_facts] : {}
-
ownership_classification = payload[:ownership_classification].is_a?(Hash) ? payload[:ownership_classification] : {}
-
generation_policy = payload[:generation_policy].is_a?(Hash) ? payload[:generation_policy] : {}
-
return if verified_story_facts.blank? && ownership_classification.blank? && generation_policy.blank?
-
-
signature_payload = {
-
verified_story_facts: build_cv_ocr_evidence(local_story_intelligence: verified_story_facts),
-
ownership_classification: ownership_classification,
-
generation_policy: generation_policy
-
}
-
signature = Digest::SHA256.hexdigest(signature_payload.to_json)
-
-
current_meta = metadata.is_a?(Hash) ? metadata.deep_dup : {}
-
stored = current_meta["validated_story_insights"].is_a?(Hash) ? current_meta["validated_story_insights"] : {}
-
return if stored["signature"].to_s == signature
-
-
current_meta["validated_story_insights"] = {
-
"signature" => signature,
-
"validated_at" => Time.current.iso8601,
-
"verified_story_facts" => verified_story_facts,
-
"ownership_classification" => ownership_classification,
-
"generation_policy" => generation_policy
-
}
-
current_meta["story_ownership_classification"] = ownership_classification
-
current_meta["story_generation_policy"] = generation_policy
-
current_meta["detected_external_usernames"] = Array(ownership_classification[:detected_external_usernames] || ownership_classification["detected_external_usernames"]).map(&:to_s).first(12)
-
source_profile_references = Array(ownership_classification[:source_profile_references] || ownership_classification["source_profile_references"] || verified_story_facts[:source_profile_references] || verified_story_facts["source_profile_references"]).map(&:to_s).reject(&:blank?).first(20)
-
source_profile_ids = Array(ownership_classification[:source_profile_ids] || ownership_classification["source_profile_ids"] || verified_story_facts[:source_profile_ids] || verified_story_facts["source_profile_ids"]).map(&:to_s).reject(&:blank?).first(20)
-
share_status = (ownership_classification[:share_status] || ownership_classification["share_status"]).to_s.presence || "unknown"
-
allow_comment_value = if generation_policy.key?(:allow_comment)
-
generation_policy[:allow_comment]
-
else
-
generation_policy["allow_comment"]
-
end
-
excluded_from_narrative = story_excluded_from_narrative?(ownership: ownership_classification, policy: generation_policy)
-
current_meta["source_profile_references"] = source_profile_references
-
current_meta["source_profile_ids"] = source_profile_ids
-
current_meta["share_status"] = share_status
-
current_meta["analysis_excluded"] = excluded_from_narrative
-
current_meta["analysis_exclusion_reason"] = if excluded_from_narrative
-
ownership_classification[:summary].to_s.presence || ownership_classification["summary"].to_s.presence || generation_policy[:reason].to_s.presence || generation_policy["reason"].to_s.presence
-
end
-
current_meta["content_classification"] = {
-
"share_status" => share_status,
-
"ownership_label" => ownership_classification[:label] || ownership_classification["label"],
-
"allow_comment" => ActiveModel::Type::Boolean.new.cast(allow_comment_value),
-
"source_profile_references" => source_profile_references,
-
"source_profile_ids" => source_profile_ids
-
}
-
update_columns(metadata: current_meta, updated_at: Time.current)
-
-
return if excluded_from_narrative
-
-
history_payload = verified_story_facts.merge(
-
ownership_classification: ownership_classification[:label] || ownership_classification["label"],
-
ownership_summary: ownership_classification[:summary] || ownership_classification["summary"],
-
ownership_confidence: ownership_classification[:confidence] || ownership_classification["confidence"],
-
ownership_reason_codes: Array(ownership_classification[:reason_codes] || ownership_classification["reason_codes"]).first(12),
-
generation_policy: generation_policy,
-
description: build_story_image_description(local_story_intelligence: verified_story_facts)
-
)
-
AppendProfileHistoryNarrativeJob.perform_later(
-
instagram_profile_event_id: id,
-
mode: "story_intelligence",
-
intelligence: history_payload
-
)
-
rescue StandardError
-
nil
-
end
-
def build_story_image_description(local_story_intelligence:)
-
signals = Array(local_story_intelligence[:objects]).first(6)
-
if signals.empty?
-
signals = Array(local_story_intelligence[:object_detections])
-
.map { |row| row.is_a?(Hash) ? (row[:label] || row["label"]) : nil }
-
.map(&:to_s)
-
.map(&:strip)
-
.reject(&:blank?)
-
.uniq
-
.first(6)
-
end
-
ocr = local_story_intelligence[:ocr_text].to_s.strip
-
transcript = local_story_intelligence[:transcript].to_s.strip
-
topic_text = Array(local_story_intelligence[:topics]).first(5).join(", ")
-
scene_count = Array(local_story_intelligence[:scenes]).length
-
face_count = local_story_intelligence[:face_count].to_i
-
-
parts = []
-
parts << "Detected visual signals: #{signals.join(', ')}." if signals.any?
-
parts << "Detected scene transitions: #{scene_count}." if scene_count.positive?
-
parts << "Detected faces: #{face_count}." if face_count.positive?
-
parts << "OCR text: #{ocr}." if ocr.present?
-
parts << "Audio transcript: #{transcript}." if transcript.present?
-
parts << "Inferred topics: #{topic_text}." if topic_text.present?
-
parts << "Story media context extracted from local AI pipeline." if parts.empty?
-
parts.join(" ")
-
end
-
def first_present(*values)
-
values.each do |value|
-
text = value.to_s.strip
-
return text if text.present?
-
end
-
nil
-
end
-
def merge_unique_values(*values)
-
values.flat_map { |value| Array(value) }
-
.map(&:to_s)
-
.map(&:strip)
-
.reject(&:blank?)
-
.uniq
-
.first(40)
-
end
-
def apply_historical_validation(validated_story_insights:, historical_comparison:)
-
insights = validated_story_insights.is_a?(Hash) ? validated_story_insights.deep_dup : {}
-
ownership = insights[:ownership_classification].is_a?(Hash) ? insights[:ownership_classification] : {}
-
policy = insights[:generation_policy].is_a?(Hash) ? insights[:generation_policy] : {}
-
-
has_overlap = ActiveModel::Type::Boolean.new.cast(historical_comparison[:has_historical_overlap])
-
external_usernames = Array(ownership[:detected_external_usernames]).map(&:to_s).reject(&:blank?)
-
if ownership[:label].to_s == "owned_by_profile" && !has_overlap && external_usernames.any?
-
ownership[:label] = "third_party_content"
-
ownership[:decision] = "skip_comment"
-
ownership[:reason_codes] = Array(ownership[:reason_codes]) + [ "no_historical_overlap_with_external_usernames" ]
-
ownership[:summary] = "Detected external usernames without historical overlap; classified as third-party content."
-
policy[:allow_comment] = false
-
policy[:reason_code] = "no_historical_overlap_with_external_usernames"
-
policy[:reason] = ownership[:summary]
-
policy[:classification] = ownership[:label]
-
end
-
policy[:historical_overlap] = has_overlap
-
-
insights[:ownership_classification] = ownership
-
insights[:generation_policy] = policy
-
insights
-
rescue StandardError
-
validated_story_insights
-
end
-
def local_story_intelligence_blank?(payload)
-
return true unless payload.is_a?(Hash)
-
-
payload[:ocr_text].to_s.strip.blank? &&
-
payload[:transcript].to_s.strip.blank? &&
-
Array(payload[:objects]).empty? &&
-
Array(payload[:object_detections]).empty? &&
-
Array(payload[:ocr_blocks]).empty? &&
-
Array(payload[:scenes]).empty? &&
-
Array(payload[:hashtags]).empty? &&
-
Array(payload[:mentions]).empty? &&
-
Array(payload[:profile_handles]).empty? &&
-
Array(payload[:topics]).empty? &&
-
payload[:face_count].to_i <= 0 &&
-
Array(payload[:people]).empty?
-
end
-
def extract_live_local_intelligence_from_event_media(story_id:)
-
content_type = media&.blob&.content_type.to_s
-
return {} if content_type.blank?
-
-
if content_type.start_with?("image/")
-
extract_local_intelligence_from_image_bytes(image_bytes: media.download, story_id: story_id)
-
elsif content_type.start_with?("video/")
-
extract_local_intelligence_from_video_bytes(video_bytes: media.download, story_id: story_id, content_type: content_type)
-
else
-
{}
-
end
-
rescue StandardError
-
{}
-
end
-
def extract_local_intelligence_from_image_bytes(image_bytes:, story_id:)
-
detection = FaceDetectionService.new.detect(
-
media_payload: { story_id: story_id.to_s, image_bytes: image_bytes }
-
)
-
understanding = StoryContentUnderstandingService.new.build(
-
media_type: "image",
-
detections: [detection],
-
transcript_text: nil
-
)
-
people = resolve_people_from_faces(detected_faces: Array(detection[:faces]), fallback_image_bytes: image_bytes, story_id: story_id)
-
-
{
-
ocr_text: understanding[:ocr_text].to_s.presence,
-
transcript: understanding[:transcript].to_s.presence,
-
objects: Array(understanding[:objects]).map(&:to_s).reject(&:blank?).uniq.first(30),
-
hashtags: Array(understanding[:hashtags]).map(&:to_s).reject(&:blank?).uniq.first(20),
-
mentions: Array(understanding[:mentions]).map(&:to_s).reject(&:blank?).uniq.first(20),
-
profile_handles: Array(understanding[:profile_handles]).map(&:to_s).reject(&:blank?).uniq.first(30),
-
topics: Array(understanding[:topics]).map(&:to_s).reject(&:blank?).uniq.first(30),
-
scenes: Array(understanding[:scenes]).first(80),
-
ocr_blocks: Array(understanding[:ocr_blocks]).first(120),
-
object_detections: normalize_object_detections(understanding[:object_detections], limit: 120),
-
face_count: Array(detection[:faces]).length,
-
people: people,
-
reason: detection.dig(:metadata, :reason).to_s.presence,
-
source: "live_local_vision_ocr"
-
}
-
end
-
def extract_local_intelligence_from_video_bytes(video_bytes:, story_id:, content_type:)
-
frame_result = VideoFrameExtractionService.new.extract(
-
video_bytes: video_bytes,
-
story_id: story_id.to_s,
-
content_type: content_type.to_s
-
)
-
detections = []
-
faces = []
-
-
Array(frame_result[:frames]).first(8).each do |frame|
-
detection = FaceDetectionService.new.detect(
-
media_payload: { story_id: story_id.to_s, image_bytes: frame[:image_bytes] }
-
)
-
detections << detection
-
Array(detection[:faces]).each { |face| faces << face.merge(image_bytes: frame[:image_bytes]) }
-
end
-
-
audio_result = VideoAudioExtractionService.new.extract(
-
video_bytes: video_bytes,
-
story_id: story_id.to_s,
-
content_type: content_type.to_s
-
)
-
transcript = SpeechTranscriptionService.new.transcribe(
-
audio_bytes: audio_result[:audio_bytes],
-
story_id: story_id.to_s
-
)
-
video_intel = Ai::LocalMicroserviceClient.new.analyze_video_story_intelligence!(
-
video_bytes: video_bytes,
-
sample_rate: 2,
-
usage_context: { workflow: "story_processing", story_id: story_id.to_s }
-
) rescue {}
-
-
understanding = StoryContentUnderstandingService.new.build(
-
media_type: "video",
-
detections: detections,
-
transcript_text: transcript[:transcript]
-
)
-
-
people = resolve_people_from_faces(
-
detected_faces: faces,
-
fallback_image_bytes: faces.first&.dig(:image_bytes),
-
story_id: story_id
-
)
-
-
{
-
ocr_text: understanding[:ocr_text].to_s.presence,
-
transcript: understanding[:transcript].to_s.presence,
-
objects: Array(understanding[:objects]).map(&:to_s).reject(&:blank?).uniq.first(40),
-
hashtags: Array(understanding[:hashtags]).map(&:to_s).reject(&:blank?).uniq.first(25),
-
mentions: Array(understanding[:mentions]).map(&:to_s).reject(&:blank?).uniq.first(25),
-
profile_handles: Array(understanding[:profile_handles]).map(&:to_s).reject(&:blank?).uniq.first(40),
-
topics: Array(understanding[:topics]).map(&:to_s).reject(&:blank?).uniq.first(40),
-
scenes: normalize_hash_array(understanding[:scenes], video_intel["scenes"]).first(80),
-
ocr_blocks: normalize_hash_array(understanding[:ocr_blocks], video_intel["ocr_blocks"]).first(120),
-
object_detections: normalize_object_detections(understanding[:object_detections], video_intel["object_detections"], limit: 120),
-
face_count: faces.length,
-
people: people,
-
reason: [ frame_result.dig(:metadata, :reason), audio_result.dig(:metadata, :reason), transcript.dig(:metadata, :reason) ]
-
.map(&:to_s)
-
.reject(&:blank?)
-
.uniq
-
.join(", ")
-
.presence,
-
source: "live_local_video_vision_ocr_transcript"
-
}
-
end
-
-
end
-
end
-
class InstagramProfileHistoryChunk < ApplicationRecord
-
belongs_to :instagram_account
-
belongs_to :instagram_profile
-
-
validates :sequence, presence: true
-
validates :word_count, numericality: { greater_than_or_equal_to: 0 }
-
validates :entry_count, numericality: { greater_than_or_equal_to: 0 }
-
-
scope :ordered, -> { order(:sequence, :id) }
-
scope :recent_first, -> { order(sequence: :desc, id: :desc) }
-
end
-
class InstagramProfileInsight < ApplicationRecord
-
belongs_to :instagram_account
-
belongs_to :instagram_profile
-
belongs_to :ai_analysis
-
-
has_one :instagram_profile_message_strategy, dependent: :destroy
-
has_many :instagram_profile_signal_evidences, dependent: :destroy
-
-
validates :last_refreshed_at, presence: true
-
-
scope :recent_first, -> { order(created_at: :desc, id: :desc) }
-
end
-
class InstagramProfileMessageStrategy < ApplicationRecord
-
belongs_to :instagram_account
-
belongs_to :instagram_profile
-
belongs_to :ai_analysis
-
belongs_to :instagram_profile_insight
-
-
scope :recent_first, -> { order(created_at: :desc, id: :desc) }
-
end
-
class InstagramProfilePost < ApplicationRecord
-
belongs_to :instagram_account
-
belongs_to :instagram_profile
-
has_many :instagram_profile_post_comments, dependent: :destroy
-
has_many :instagram_post_faces, dependent: :destroy
-
has_many :ai_analyses, as: :analyzable, dependent: :destroy
-
-
has_one_attached :media
-
has_one_attached :preview_image
-
-
validates :shortcode, presence: true
-
-
scope :recent_first, -> { order(taken_at: :desc, id: :desc) }
-
scope :pending_ai, -> { where(ai_status: "pending") }
-
-
def permalink_url
-
permalink.presence || "#{Instagram::Client::INSTAGRAM_BASE_URL}/p/#{shortcode}/"
-
end
-
-
def latest_analysis
-
ai_analyses.where(purpose: "post").recent_first.first
-
end
-
end
-
class InstagramProfilePostComment < ApplicationRecord
-
belongs_to :instagram_profile_post
-
belongs_to :instagram_profile
-
-
validates :body, presence: true
-
-
scope :recent_first, -> { order(commented_at: :desc, id: :desc) }
-
end
-
class InstagramProfileSignalEvidence < ApplicationRecord
-
belongs_to :instagram_account
-
belongs_to :instagram_profile
-
belongs_to :ai_analysis
-
belongs_to :instagram_profile_insight
-
-
validates :signal_type, presence: true
-
-
scope :recent_first, -> { order(created_at: :desc, id: :desc) }
-
end
-
class InstagramProfileTagging < ApplicationRecord
-
belongs_to :instagram_profile
-
belongs_to :profile_tag
-
-
validates :instagram_profile_id, uniqueness: { scope: :profile_tag_id }
-
end
-
-
class InstagramStory < ApplicationRecord
-
belongs_to :instagram_account
-
belongs_to :instagram_profile
-
belongs_to :source_event, class_name: "InstagramProfileEvent", optional: true
-
-
has_many :instagram_story_faces, dependent: :destroy
-
has_one_attached :media
-
-
validates :story_id, presence: true
-
validates :processing_status, presence: true
-
-
scope :processed, -> { where(processed: true) }
-
scope :recent_first, -> { order(taken_at: :desc, id: :desc) }
-
-
def video?
-
media_type.to_s == "video" || media&.content_type.to_s.start_with?("video/")
-
end
-
-
def image?
-
!video?
-
end
-
end
-
class InstagramStoryFace < ApplicationRecord
-
ROLES = %w[primary_user secondary_person unknown].freeze
-
-
belongs_to :instagram_story
-
belongs_to :instagram_story_person, optional: true
-
-
validates :role, presence: true, inclusion: { in: ROLES }
-
end
-
class InstagramStoryPerson < ApplicationRecord
-
ROLES = %w[primary_user secondary_person unknown].freeze
-
INACTIVE_MATCHING_STATUSES = %w[incorrect irrelevant synthetic].freeze
-
-
belongs_to :instagram_account
-
belongs_to :instagram_profile
-
-
has_many :instagram_story_faces, dependent: :nullify
-
has_many :instagram_post_faces, dependent: :nullify
-
-
validates :role, presence: true, inclusion: { in: ROLES }
-
-
scope :recently_seen, -> { order(last_seen_at: :desc, id: :desc) }
-
-
def display_label
-
label.to_s.presence || "person_#{id}"
-
end
-
-
def metadata_hash
-
metadata.is_a?(Hash) ? metadata : {}
-
end
-
-
def feedback_metadata
-
value = metadata_hash["user_feedback"]
-
value.is_a?(Hash) ? value : {}
-
end
-
-
def real_person_status
-
feedback_metadata["real_person_status"].to_s.presence || "unverified"
-
end
-
-
def merged_into_person_id
-
value = metadata_hash["merged_into_person_id"]
-
value.present? ? value.to_i : nil
-
end
-
-
def merged?
-
merged_into_person_id.present?
-
end
-
-
def active_for_matching?
-
return false if merged?
-
-
!INACTIVE_MATCHING_STATUSES.include?(real_person_status)
-
end
-
-
def identity_confidence
-
raw = metadata_hash["identity_confidence"]
-
return 0.0 if raw.nil?
-
-
raw.to_f.clamp(0.0, 1.0)
-
end
-
-
def sync_identity_confidence!(timestamp: Time.current)
-
meta = metadata_hash.deep_dup
-
meta["identity_confidence"] = self.class.identity_confidence_score(
-
appearance_count: appearance_count.to_i,
-
role: role.to_s,
-
metadata: meta
-
)
-
update_columns(metadata: meta, updated_at: timestamp)
-
meta["identity_confidence"].to_f
-
end
-
-
def self.identity_confidence_score(appearance_count:, role:, metadata:)
-
count = appearance_count.to_i
-
score = [ count / 10.0, 1.0 ].min
-
score += 0.18 if role.to_s == "primary_user"
-
-
meta = metadata.is_a?(Hash) ? metadata : {}
-
feedback = meta["user_feedback"].is_a?(Hash) ? meta["user_feedback"] : {}
-
status = feedback["real_person_status"].to_s
-
score += 0.22 if status == "confirmed_real_person"
-
score += 0.10 if status == "likely_real_person"
-
score -= 0.45 if INACTIVE_MATCHING_STATUSES.include?(status)
-
-
linked_usernames_count = Array(meta["linked_usernames"]).reject(&:blank?).size
-
score += [ linked_usernames_count * 0.03, 0.15 ].min
-
-
score.clamp(0.0, 1.0).round(3)
-
end
-
end
-
class ProfileTag < ApplicationRecord
-
has_many :instagram_profile_taggings, dependent: :destroy
-
has_many :instagram_profiles, through: :instagram_profile_taggings
-
-
validates :name, presence: true, uniqueness: { case_sensitive: false }
-
-
before_validation do
-
self.name = name.to_s.strip.downcase
-
end
-
end
-
-
class SyncRun < ApplicationRecord
-
belongs_to :instagram_account
-
-
validates :kind, presence: true
-
validates :status, presence: true
-
-
def stats
-
return {} if stats_json.blank?
-
-
JSON.parse(stats_json)
-
rescue JSON::ParserError
-
{}
-
end
-
-
def stats=(value)
-
self.stats_json = value.to_h.to_json
-
end
-
end
-
-
1
module Ai
-
1
class ApiUsageTracker
-
1
THREAD_CONTEXT_KEY = :ai_api_usage_context
-
-
1
class << self
-
1
def with_context(context = {})
-
2
previous = current_context
-
2
Thread.current[THREAD_CONTEXT_KEY] = previous.merge(context.to_h.compact)
-
2
yield
-
ensure
-
2
Thread.current[THREAD_CONTEXT_KEY] = previous
-
end
-
-
1
def current_context
-
2
then: 1
else: 1
Thread.current[THREAD_CONTEXT_KEY].is_a?(Hash) ? Thread.current[THREAD_CONTEXT_KEY] : {}
-
end
-
-
1
def track_success(provider:, operation:, category:, started_at:, instagram_account_id: nil, http_status: nil, request_units: nil, input_tokens: nil, output_tokens: nil, total_tokens: nil, metadata: {})
-
create_record(
-
provider: provider,
-
operation: operation,
-
category: category,
-
status: "succeeded",
-
started_at: started_at,
-
instagram_account_id: instagram_account_id,
-
http_status: http_status,
-
request_units: request_units,
-
input_tokens: input_tokens,
-
output_tokens: output_tokens,
-
total_tokens: total_tokens,
-
metadata: metadata
-
)
-
end
-
-
1
def track_failure(provider:, operation:, category:, started_at:, error:, instagram_account_id: nil, http_status: nil, request_units: nil, metadata: {})
-
create_record(
-
provider: provider,
-
operation: operation,
-
category: category,
-
status: "failed",
-
started_at: started_at,
-
instagram_account_id: instagram_account_id,
-
http_status: http_status,
-
request_units: request_units,
-
metadata: metadata,
-
error_message: error.to_s
-
)
-
end
-
-
1
private
-
-
1
def create_record(provider:, operation:, category:, status:, started_at:, instagram_account_id:, http_status:, request_units:, input_tokens: nil, output_tokens: nil, total_tokens: nil, metadata: {}, error_message: nil)
-
occurred_at = Time.current
-
latency_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at.to_f) * 1000.0).round
-
context = current_context
-
account_id = integer_or_nil(instagram_account_id) || integer_or_nil(context[:instagram_account_id])
-
-
AiApiCall.create!(
-
instagram_account_id: account_id,
-
provider: provider.to_s,
-
operation: operation.to_s,
-
category: normalize_category(category),
-
status: status.to_s,
-
http_status: integer_or_nil(http_status),
-
latency_ms: latency_ms,
-
request_units: integer_or_nil(request_units),
-
input_tokens: integer_or_nil(input_tokens),
-
output_tokens: integer_or_nil(output_tokens),
-
total_tokens: integer_or_nil(total_tokens),
-
error_message: error_message,
-
occurred_at: occurred_at,
-
metadata: (metadata.to_h.compact.presence || {}).merge(context.except(:instagram_account_id))
-
)
-
rescue StandardError => e
-
Rails.logger.warn("[Ai::ApiUsageTracker] failed to persist usage event: #{e.class}: #{e.message}")
-
end
-
-
1
def normalize_category(value)
-
raw = value.to_s.strip
-
then: 0
else: 0
return raw if AiApiCall::CATEGORIES.include?(raw)
-
-
"other"
-
end
-
-
1
def integer_or_nil(value)
-
then: 0
else: 0
return nil if value.blank?
-
-
Integer(value)
-
rescue StandardError
-
nil
-
end
-
end
-
end
-
end
-
module Ai
-
class CommentRelevanceScorer
-
class << self
-
def rank(suggestions:, image_description:, topics:, historical_comments: [])
-
rows = Array(suggestions).map do |suggestion|
-
text = suggestion.to_s.strip
-
next if text.blank?
-
-
[
-
text,
-
score(
-
comment: text,
-
image_description: image_description,
-
topics: topics,
-
historical_comments: historical_comments
-
)
-
]
-
end.compact
-
-
rows.sort_by { |(_text, value)| -value }
-
end
-
-
def score(comment:, image_description:, topics:, historical_comments: [])
-
tokens = normalize_tokens(comment)
-
return 0.0 if tokens.empty?
-
-
topic_tokens = normalize_tokens(Array(topics).join(" "))
-
image_tokens = normalize_tokens(image_description)
-
history_tokens = Array(historical_comments).flat_map { |value| normalize_tokens(value) }
-
-
topic_overlap = overlap_ratio(tokens, topic_tokens)
-
image_overlap = overlap_ratio(tokens, image_tokens)
-
novelty = 1.0 - overlap_ratio(tokens, history_tokens)
-
-
length_bonus = if comment.length.between?(20, 110)
-
0.12
-
elsif comment.length > 140
-
-0.2
-
else
-
0.0
-
end
-
-
raw = (topic_overlap * 0.4) + (image_overlap * 0.25) + (novelty * 0.35) + length_bonus
-
raw.clamp(0.0, 1.0).round(4)
-
end
-
-
private
-
-
def overlap_ratio(tokens, other_tokens)
-
return 0.0 if tokens.empty? || other_tokens.empty?
-
-
shared = (tokens & other_tokens).size
-
(shared.to_f / tokens.size.to_f).clamp(0.0, 1.0)
-
end
-
-
def normalize_tokens(value)
-
value.to_s
-
.downcase
-
.gsub(/[^a-z0-9\s]/, " ")
-
.split
-
.reject { |token| token.length < 3 }
-
.uniq
-
end
-
end
-
end
-
end
-
module Ai
-
class InsightSync
-
class << self
-
def sync_profile!(analysis_record:, payload:, analysis_hash:)
-
profile = analysis_record.analyzable
-
return unless profile.is_a?(InstagramProfile)
-
-
languages = Array(analysis_hash["languages"]).filter_map do |l|
-
next unless l.is_a?(Hash)
-
l["language"].to_s.strip.presence
-
end
-
-
primary_language = languages.first
-
secondary_languages = languages.drop(1)
-
-
writing_style = analysis_hash["writing_style"].is_a?(Hash) ? analysis_hash["writing_style"] : {}
-
likes = normalize_string_array(analysis_hash["likes"])
-
dislikes = normalize_string_array(analysis_hash["dislikes"])
-
-
insight = InstagramProfileInsight.create!(
-
instagram_account: analysis_record.instagram_account,
-
instagram_profile: profile,
-
ai_analysis: analysis_record,
-
summary: analysis_hash["summary"].to_s,
-
primary_language: primary_language,
-
secondary_languages: secondary_languages,
-
tone: writing_style["tone"].to_s.presence,
-
formality: writing_style["formality"].to_s.presence,
-
emoji_usage: writing_style["emoji_usage"].to_s.presence,
-
slang_level: writing_style["slang_level"].to_s.presence,
-
engagement_style: infer_engagement_style(writing_style: writing_style),
-
profile_type: infer_profile_type(profile: profile, payload: payload),
-
messageability_score: infer_messageability_score(payload),
-
last_refreshed_at: Time.current,
-
raw_analysis: analysis_hash
-
)
-
-
InstagramProfileMessageStrategy.create!(
-
instagram_account: analysis_record.instagram_account,
-
instagram_profile: profile,
-
ai_analysis: analysis_record,
-
instagram_profile_insight: insight,
-
opener_templates: normalize_string_array(analysis_hash["suggested_dm_openers"]),
-
comment_templates: normalize_string_array(analysis_hash["suggested_comment_templates"]),
-
dos: (likes + normalize_string_array(analysis_hash["personalization_tokens"])).uniq.first(10),
-
donts: (dislikes + normalize_string_array(analysis_hash["no_go_zones"])).uniq.first(10),
-
cta_style: infer_cta_style(analysis_hash),
-
best_topics: likes.first(15),
-
avoid_topics: dislikes.first(15)
-
)
-
-
create_profile_evidences!(
-
insight: insight,
-
analysis_record: analysis_record,
-
analysis_hash: analysis_hash,
-
likes: likes,
-
dislikes: dislikes
-
)
-
end
-
-
def sync_post!(analysis_record:, analysis_hash:)
-
post = analysis_record.analyzable
-
return unless post.is_a?(InstagramPost)
-
-
topics = normalize_string_array(analysis_hash["topics"])
-
actions = normalize_string_array(analysis_hash["suggested_actions"])
-
comments = normalize_string_array(analysis_hash["comment_suggestions"])
-
-
post_insight = InstagramPostInsight.create!(
-
instagram_account: analysis_record.instagram_account,
-
instagram_post: post,
-
ai_analysis: analysis_record,
-
image_description: analysis_hash["image_description"].to_s.presence,
-
relevant: to_bool(analysis_hash["relevant"]),
-
author_type: analysis_hash["author_type"].to_s.presence,
-
sentiment: analysis_hash["sentiment"].to_s.presence,
-
topics: topics,
-
suggested_actions: actions,
-
comment_suggestions: comments,
-
confidence: to_float(analysis_hash["confidence"]),
-
evidence: analysis_hash["evidence"].to_s,
-
engagement_score: to_float(analysis_hash["engagement_score"]) || to_float(analysis_hash["confidence"]),
-
recommended_next_action: analysis_hash["recommended_next_action"].to_s.presence || actions.first,
-
raw_analysis: analysis_hash
-
)
-
-
(topics + normalize_string_array(analysis_hash["personalization_tokens"])).uniq.each do |topic|
-
InstagramPostEntity.create!(
-
instagram_account: analysis_record.instagram_account,
-
instagram_post: post,
-
instagram_post_insight: post_insight,
-
entity_type: topics.include?(topic) ? "topic" : "personalization_token",
-
value: topic,
-
confidence: to_float(analysis_hash["confidence"]),
-
evidence_text: analysis_hash["evidence"].to_s,
-
source_type: "ai_analysis",
-
source_ref: analysis_record.id.to_s
-
)
-
end
-
end
-
-
private
-
-
def create_profile_evidences!(insight:, analysis_record:, analysis_hash:, likes:, dislikes:)
-
Array(analysis_hash["languages"]).each do |lang|
-
next unless lang.is_a?(Hash)
-
-
value = lang["language"].to_s.strip
-
next if value.blank?
-
-
InstagramProfileSignalEvidence.create!(
-
instagram_account: analysis_record.instagram_account,
-
instagram_profile: insight.instagram_profile,
-
ai_analysis: analysis_record,
-
instagram_profile_insight: insight,
-
signal_type: "language",
-
value: value,
-
confidence: to_float(lang["confidence"]),
-
evidence_text: lang["evidence"].to_s,
-
source_type: "ai_analysis",
-
source_ref: analysis_record.id.to_s,
-
occurred_at: Time.current
-
)
-
end
-
-
likes.each do |topic|
-
InstagramProfileSignalEvidence.create!(
-
instagram_account: analysis_record.instagram_account,
-
instagram_profile: insight.instagram_profile,
-
ai_analysis: analysis_record,
-
instagram_profile_insight: insight,
-
signal_type: "interest",
-
value: topic,
-
confidence: nil,
-
evidence_text: "likes",
-
source_type: "ai_analysis",
-
source_ref: analysis_record.id.to_s,
-
occurred_at: Time.current
-
)
-
end
-
-
dislikes.each do |topic|
-
InstagramProfileSignalEvidence.create!(
-
instagram_account: analysis_record.instagram_account,
-
instagram_profile: insight.instagram_profile,
-
ai_analysis: analysis_record,
-
instagram_profile_insight: insight,
-
signal_type: "avoidance",
-
value: topic,
-
confidence: nil,
-
evidence_text: "dislikes",
-
source_type: "ai_analysis",
-
source_ref: analysis_record.id.to_s,
-
occurred_at: Time.current
-
)
-
end
-
-
notes = analysis_hash["confidence_notes"].to_s.strip
-
if notes.present?
-
InstagramProfileSignalEvidence.create!(
-
instagram_account: analysis_record.instagram_account,
-
instagram_profile: insight.instagram_profile,
-
ai_analysis: analysis_record,
-
instagram_profile_insight: insight,
-
signal_type: "confidence_note",
-
value: nil,
-
confidence: nil,
-
evidence_text: notes,
-
source_type: "ai_analysis",
-
source_ref: analysis_record.id.to_s,
-
occurred_at: Time.current
-
)
-
end
-
end
-
-
def infer_profile_type(profile:, payload:)
-
tags = profile.profile_tags.pluck(:name)
-
return "page" if tags.include?("page")
-
return "personal" if tags.include?("personal_user") || tags.include?("friend")
-
-
bio = payload[:bio].to_s.downcase
-
return "business" if bio.match?(/\b(bookings|business|official|shop|store)\b/)
-
-
"unknown"
-
end
-
-
def infer_messageability_score(payload)
-
can_message = payload[:can_message]
-
return 0.8 if can_message == true
-
return 0.2 if can_message == false
-
-
0.5
-
end
-
-
def infer_engagement_style(writing_style:)
-
tone = writing_style["tone"].to_s
-
formality = writing_style["formality"].to_s
-
emoji = writing_style["emoji_usage"].to_s
-
[tone, formality, emoji].reject(&:blank?).join("/").presence || "unknown"
-
end
-
-
def infer_cta_style(analysis_hash)
-
first = normalize_string_array(analysis_hash["suggested_dm_openers"]).first.to_s
-
return "question_based" if first.include?("?")
-
-
"soft"
-
end
-
-
def normalize_string_array(value)
-
Array(value).filter_map { |v| v.to_s.strip.presence }
-
end
-
-
def to_float(value)
-
Float(value)
-
rescue StandardError
-
nil
-
end
-
-
def to_bool(value)
-
ActiveModel::Type::Boolean.new.cast(value)
-
end
-
end
-
end
-
end
-
require "json"
-
require "net/http"
-
-
module Ai
-
class LocalEngagementCommentGenerator
-
DEFAULT_MODEL = "mistral:7b".freeze
-
MIN_SUGGESTIONS = 3
-
MAX_SUGGESTIONS = 8
-
-
BLOCKED_TERMS = %w[].freeze
-
TRANSIENT_ERRORS = [
-
Net::OpenTimeout,
-
Net::ReadTimeout,
-
Errno::ECONNRESET,
-
Errno::ECONNREFUSED
-
].freeze
-
-
def initialize(ollama_client:, model: nil)
-
@ollama_client = ollama_client
-
@model = model.to_s.presence || DEFAULT_MODEL
-
end
-
-
def generate!(post_payload:, image_description:, topics:, author_type:, historical_comments: [], historical_context: nil, historical_story_context: [], local_story_intelligence: {}, historical_comparison: {}, cv_ocr_evidence: {}, verified_story_facts: {}, story_ownership_classification: {}, generation_policy: {}, profile_preparation: {}, verified_profile_history: [], conversational_voice: {}, **_extra)
-
if generation_policy.is_a?(Hash) && generation_policy.key?(:allow_comment) && !ActiveModel::Type::Boolean.new.cast(generation_policy[:allow_comment] || generation_policy["allow_comment"])
-
return {
-
model: @model,
-
prompt: nil,
-
raw: {},
-
source: "policy",
-
status: "blocked_by_policy",
-
fallback_used: false,
-
error_message: generation_policy[:reason].to_s.presence || generation_policy["reason"].to_s.presence || "Generation blocked by verified story policy.",
-
comment_suggestions: []
-
}
-
end
-
-
prompt = build_prompt(
-
post_payload: post_payload,
-
image_description: image_description,
-
topics: topics,
-
author_type: author_type,
-
historical_comments: historical_comments,
-
historical_context: historical_context,
-
historical_story_context: historical_story_context,
-
local_story_intelligence: local_story_intelligence,
-
historical_comparison: historical_comparison,
-
cv_ocr_evidence: cv_ocr_evidence,
-
verified_story_facts: verified_story_facts,
-
story_ownership_classification: story_ownership_classification,
-
generation_policy: generation_policy,
-
profile_preparation: profile_preparation,
-
verified_profile_history: verified_profile_history,
-
conversational_voice: conversational_voice
-
)
-
-
resp = @ollama_client.generate(
-
model: @model,
-
prompt: prompt,
-
temperature: 0.7,
-
max_tokens: 300
-
)
-
-
suggestions = parse_comment_suggestions(resp)
-
suggestions = filter_safe_comments(suggestions)
-
-
if suggestions.size < MIN_SUGGESTIONS
-
retry_resp = @ollama_client.generate(
-
model: @model,
-
prompt: "#{prompt}\n\nReturn strict JSON only. Ensure 8 non-empty suggestions.",
-
temperature: 0.4,
-
max_tokens: 220
-
)
-
retry_suggestions = filter_safe_comments(parse_comment_suggestions(retry_resp))
-
suggestions = retry_suggestions if retry_suggestions.size >= MIN_SUGGESTIONS
-
end
-
-
if suggestions.size < MIN_SUGGESTIONS
-
fallback = fallback_comments(image_description: image_description, topics: topics).first(MAX_SUGGESTIONS)
-
return {
-
model: @model,
-
prompt: prompt,
-
raw: resp,
-
source: "fallback",
-
status: "fallback_used",
-
fallback_used: true,
-
error_message: "Generated suggestions were insufficient (#{suggestions.size}/#{MIN_SUGGESTIONS})",
-
comment_suggestions: fallback
-
}
-
end
-
-
{
-
model: @model,
-
prompt: prompt,
-
raw: resp,
-
source: "ollama",
-
status: "ok",
-
fallback_used: false,
-
error_message: nil,
-
comment_suggestions: suggestions.first(MAX_SUGGESTIONS)
-
}
-
rescue *TRANSIENT_ERRORS
-
raise
-
rescue StandardError => e
-
{
-
model: @model,
-
prompt: prompt,
-
raw: {},
-
source: "fallback",
-
status: "error_fallback",
-
fallback_used: true,
-
error_message: e.message.to_s,
-
comment_suggestions: fallback_comments(image_description: image_description, topics: topics).first(MAX_SUGGESTIONS)
-
}
-
end
-
-
private
-
-
def build_prompt(post_payload:, image_description:, topics:, author_type:, historical_comments:, historical_context:, historical_story_context:, local_story_intelligence:, historical_comparison:, cv_ocr_evidence:, verified_story_facts:, story_ownership_classification:, generation_policy:, profile_preparation: {}, verified_profile_history: [], conversational_voice: {})
-
verified_story_facts = compact_verified_story_facts(
-
verified_story_facts,
-
local_story_intelligence: local_story_intelligence,
-
cv_ocr_evidence: cv_ocr_evidence
-
)
-
story_ownership_classification = compact_story_ownership_classification(story_ownership_classification)
-
generation_policy = compact_generation_policy(generation_policy)
-
historical_comparison = compact_historical_comparison(historical_comparison)
-
compact_story_history = compact_historical_story_context(historical_story_context)
-
profile_summary = compact_author_profile(post_payload[:author_profile], author_type: author_type)
-
profile_preparation = compact_profile_preparation(profile_preparation)
-
verified_profile_history = compact_verified_profile_history(verified_profile_history)
-
conversational_voice = compact_conversational_voice(conversational_voice)
-
-
context_json = {
-
task: "instagram_story_comment_generation",
-
output_contract: {
-
format: "strict_json",
-
count: 8,
-
max_chars_per_comment: 140
-
},
-
profile: profile_summary,
-
profile_preparation: profile_preparation,
-
conversational_voice: conversational_voice,
-
current_story: {
-
image_description: truncate_text(image_description.to_s, max: 280),
-
topics: Array(topics).map(&:to_s).reject(&:blank?).uniq.first(10),
-
verified_story_facts: verified_story_facts,
-
ownership: story_ownership_classification,
-
generation_policy: generation_policy
-
},
-
historical_context: {
-
comparison: historical_comparison,
-
recent_story_patterns: compact_story_history,
-
recent_profile_history: verified_profile_history,
-
recent_comments: Array(historical_comments).map { |value| truncate_text(value.to_s, max: 110) }.reject(&:blank?).first(6),
-
summary: truncate_text(historical_context.to_s, max: 280)
-
}
-
}
-
-
<<~PROMPT
-
You are a production-grade Instagram engagement assistant.
-
Generate concise comments from VERIFIED data only.
-
-
Grounding rules:
-
- treat CONTEXT_JSON as the only source of truth
-
- never use URLs, IDs, or hidden metadata as evidence
-
- do not infer facts not present in `verified_story_facts`
-
- require `profile_preparation.ready_for_comment_generation` to be true for personalized comments
-
- if `generation_policy.allow_comment` is false, return empty suggestions
-
- if ownership is not `owned_by_profile`, keep output neutral and non-personal
-
- if identity_verification.owner_likelihood is low, avoid user-specific assumptions
-
- never fabricate OCR text, usernames, objects, scenes, or participants
-
-
Writing rules:
-
- natural, public-safe, short comments
-
- max 140 chars each
-
- vary openings and avoid duplicates
-
- avoid explicit/adult language
-
- avoid identity, age, gender, or sensitive-trait claims
-
- reflect recurring themes and wording style from `historical_context` and `conversational_voice`
-
-
Output STRICT JSON only:
-
{
-
"comment_suggestions": ["...", "...", "...", "...", "...", "...", "...", "..."]
-
}
-
-
Generate exactly 8 suggestions, each <= 140 characters.
-
Keep at least 3 suggestions neutral-safe for public comments.
-
Avoid repeating phrases from previous comments for the same profile.
-
-
CONTEXT_JSON:
-
#{JSON.pretty_generate(context_json)}
-
PROMPT
-
end
-
-
def filter_safe_comments(comments)
-
filtered = Array(comments)
-
return filtered if BLOCKED_TERMS.empty?
-
-
filtered.reject do |comment|
-
lc = comment.to_s.downcase
-
BLOCKED_TERMS.any? { |term| lc.include?(term) }
-
end
-
end
-
-
def normalize_comment(value)
-
text = value.to_s.gsub(/\s+/, " ").strip
-
return nil if text.blank?
-
-
text.byteslice(0, 140)
-
end
-
-
def parse_comment_suggestions(response_payload)
-
parsed = JSON.parse(response_payload["response"]) rescue nil
-
Array(parsed&.dig("comment_suggestions")).map { |v| normalize_comment(v) }.compact.uniq
-
end
-
-
def fallback_comments(image_description:, topics:)
-
anchor = Array(topics).map(&:to_s).find(&:present?) || image_description.to_s.split(/[,.]/).first.to_s.downcase
-
anchor = "this post" if anchor.blank?
-
-
[
-
"Okay this is a whole vibe 🔥",
-
"Not gonna lie, this #{anchor} moment is clean 👏",
-
"Love the energy on this one ✨",
-
"This is low-key so good, great post 🙌",
-
"Major main-feed energy right here 😮💨",
-
"Ate this one, no notes 💯",
-
"This made me stop scrolling fr 👀",
-
"Super solid post, keep these coming 🚀"
-
]
-
end
-
-
def truncate_text(value, max:)
-
text = value.to_s.strip
-
return text if text.length <= max
-
-
"#{text.byteslice(0, max)}..."
-
end
-
-
def compact_local_story_intelligence(payload)
-
data = payload.is_a?(Hash) ? payload : {}
-
{
-
source: data[:source] || data["source"],
-
reason: data[:reason] || data["reason"],
-
ocr_text: truncate_text(data[:ocr_text] || data["ocr_text"], max: 600),
-
transcript: truncate_text(data[:transcript] || data["transcript"], max: 600),
-
objects: Array(data[:objects] || data["objects"]).map(&:to_s).reject(&:blank?).first(20),
-
scenes: Array(data[:scenes] || data["scenes"]).first(20),
-
hashtags: Array(data[:hashtags] || data["hashtags"]).map(&:to_s).reject(&:blank?).first(20),
-
mentions: Array(data[:mentions] || data["mentions"]).map(&:to_s).reject(&:blank?).first(20),
-
profile_handles: Array(data[:profile_handles] || data["profile_handles"]).map(&:to_s).reject(&:blank?).first(20),
-
source_account_reference: (data[:source_account_reference] || data["source_account_reference"]).to_s.presence,
-
source_profile_ids: Array(data[:source_profile_ids] || data["source_profile_ids"]).map(&:to_s).reject(&:blank?).first(10),
-
media_type: (data[:media_type] || data["media_type"]).to_s.presence,
-
face_count: (data[:face_count] || data["face_count"]).to_i,
-
people: Array(data[:people] || data["people"]).first(10),
-
object_detections: Array(data[:object_detections] || data["object_detections"]).first(25),
-
ocr_blocks: Array(data[:ocr_blocks] || data["ocr_blocks"]).first(25)
-
}.compact
-
end
-
-
def compact_cv_ocr_evidence(payload)
-
data = payload.is_a?(Hash) ? payload : {}
-
{
-
source: data[:source] || data["source"],
-
reason: data[:reason] || data["reason"],
-
objects: Array(data[:objects] || data["objects"]).map(&:to_s).reject(&:blank?).first(20),
-
scenes: Array(data[:scenes] || data["scenes"]).first(20),
-
hashtags: Array(data[:hashtags] || data["hashtags"]).map(&:to_s).reject(&:blank?).first(20),
-
mentions: Array(data[:mentions] || data["mentions"]).map(&:to_s).reject(&:blank?).first(20),
-
profile_handles: Array(data[:profile_handles] || data["profile_handles"]).map(&:to_s).reject(&:blank?).first(20),
-
source_account_reference: (data[:source_account_reference] || data["source_account_reference"]).to_s.presence,
-
source_profile_ids: Array(data[:source_profile_ids] || data["source_profile_ids"]).map(&:to_s).reject(&:blank?).first(10),
-
media_type: (data[:media_type] || data["media_type"]).to_s.presence,
-
face_count: (data[:face_count] || data["face_count"]).to_i,
-
people: Array(data[:people] || data["people"]).first(10),
-
object_detections: Array(data[:object_detections] || data["object_detections"]).first(25),
-
ocr_blocks: Array(data[:ocr_blocks] || data["ocr_blocks"]).first(25),
-
ocr_text: truncate_text(data[:ocr_text] || data["ocr_text"], max: 600),
-
transcript: truncate_text(data[:transcript] || data["transcript"], max: 600)
-
}.compact
-
end
-
-
def compact_historical_comparison(payload)
-
data = payload.is_a?(Hash) ? payload : {}
-
{
-
shared_topics: Array(data[:shared_topics] || data["shared_topics"]).first(12),
-
novel_topics: Array(data[:novel_topics] || data["novel_topics"]).first(12),
-
shared_objects: Array(data[:shared_objects] || data["shared_objects"]).first(12),
-
novel_objects: Array(data[:novel_objects] || data["novel_objects"]).first(12),
-
shared_scenes: Array(data[:shared_scenes] || data["shared_scenes"]).first(12),
-
novel_scenes: Array(data[:novel_scenes] || data["novel_scenes"]).first(12),
-
recurring_hashtags: Array(data[:recurring_hashtags] || data["recurring_hashtags"]).first(12),
-
recurring_mentions: Array(data[:recurring_mentions] || data["recurring_mentions"]).first(12),
-
recurring_people_ids: Array(data[:recurring_people_ids] || data["recurring_people_ids"]).first(12),
-
has_historical_overlap: ActiveModel::Type::Boolean.new.cast(data[:has_historical_overlap] || data["has_historical_overlap"])
-
}
-
end
-
-
def compact_verified_story_facts(payload, local_story_intelligence:, cv_ocr_evidence:)
-
data = payload.is_a?(Hash) ? payload : {}
-
if data.blank?
-
data = compact_cv_ocr_evidence(cv_ocr_evidence)
-
data[:signal_score] = 0 unless data.key?(:signal_score)
-
end
-
-
{
-
source: data[:source] || data["source"],
-
reason: data[:reason] || data["reason"],
-
signal_score: (data[:signal_score] || data["signal_score"]).to_i,
-
ocr_text: truncate_text(data[:ocr_text] || data["ocr_text"], max: 320),
-
transcript: truncate_text(data[:transcript] || data["transcript"], max: 320),
-
objects: Array(data[:objects] || data["objects"]).map(&:to_s).reject(&:blank?).first(15),
-
object_detections: compact_object_detections(data[:object_detections] || data["object_detections"]),
-
scenes: compact_scenes(data[:scenes] || data["scenes"]),
-
hashtags: Array(data[:hashtags] || data["hashtags"]).map(&:to_s).reject(&:blank?).first(15),
-
mentions: Array(data[:mentions] || data["mentions"]).map(&:to_s).reject(&:blank?).first(15),
-
profile_handles: Array(data[:profile_handles] || data["profile_handles"]).map(&:to_s).reject(&:blank?).first(15),
-
detected_usernames: Array(data[:detected_usernames] || data["detected_usernames"]).map(&:to_s).reject(&:blank?).first(15),
-
source_profile_references: Array(data[:source_profile_references] || data["source_profile_references"]).map(&:to_s).reject(&:blank?).first(15),
-
share_status: (data[:share_status] || data["share_status"]).to_s.presence,
-
meme_markers: Array(data[:meme_markers] || data["meme_markers"]).map(&:to_s).reject(&:blank?).first(10),
-
face_count: (data[:face_count] || data["face_count"]).to_i,
-
faces: compact_faces_payload(data[:faces] || data["faces"]),
-
identity_verification: compact_identity_verification(data[:identity_verification] || data["identity_verification"])
-
}
-
end
-
-
def compact_story_ownership_classification(payload)
-
data = payload.is_a?(Hash) ? payload : {}
-
{
-
label: data[:label] || data["label"],
-
decision: data[:decision] || data["decision"],
-
confidence: (data[:confidence] || data["confidence"]).to_f,
-
summary: truncate_text(data[:summary] || data["summary"], max: 220),
-
reason_codes: Array(data[:reason_codes] || data["reason_codes"]).first(10),
-
detected_external_usernames: Array(data[:detected_external_usernames] || data["detected_external_usernames"]).first(10),
-
source_profile_references: Array(data[:source_profile_references] || data["source_profile_references"]).first(10),
-
share_status: data[:share_status] || data["share_status"]
-
}
-
end
-
-
def compact_generation_policy(payload)
-
data = payload.is_a?(Hash) ? payload : {}
-
allow_comment_value = if data.key?(:allow_comment)
-
data[:allow_comment]
-
else
-
data["allow_comment"]
-
end
-
{
-
allow_comment: ActiveModel::Type::Boolean.new.cast(allow_comment_value),
-
reason_code: data[:reason_code] || data["reason_code"],
-
reason: truncate_text(data[:reason] || data["reason"], max: 220),
-
classification: data[:classification] || data["classification"],
-
signal_score: (data[:signal_score] || data["signal_score"]).to_i,
-
historical_overlap: ActiveModel::Type::Boolean.new.cast(data[:historical_overlap] || data["historical_overlap"])
-
}
-
end
-
-
def compact_profile_preparation(payload)
-
data = payload.is_a?(Hash) ? payload : {}
-
identity = data[:identity_consistency].is_a?(Hash) ? data[:identity_consistency] : (data["identity_consistency"].is_a?(Hash) ? data["identity_consistency"] : {})
-
analysis = data[:analysis].is_a?(Hash) ? data[:analysis] : (data["analysis"].is_a?(Hash) ? data["analysis"] : {})
-
-
{
-
ready_for_comment_generation: ActiveModel::Type::Boolean.new.cast(data[:ready_for_comment_generation] || data["ready_for_comment_generation"]),
-
reason_code: data[:reason_code] || data["reason_code"],
-
reason: truncate_text(data[:reason] || data["reason"], max: 220),
-
prepared_at: data[:prepared_at] || data["prepared_at"],
-
analyzed_posts_count: (analysis[:analyzed_posts_count] || analysis["analyzed_posts_count"]).to_i,
-
posts_with_structured_signals_count: (analysis[:posts_with_structured_signals_count] || analysis["posts_with_structured_signals_count"]).to_i,
-
latest_posts_analyzed: ActiveModel::Type::Boolean.new.cast(analysis[:latest_posts_analyzed] || analysis["latest_posts_analyzed"]),
-
identity_consistency: {
-
consistent: ActiveModel::Type::Boolean.new.cast(identity[:consistent] || identity["consistent"]),
-
reason_code: identity[:reason_code] || identity["reason_code"],
-
dominance_ratio: (identity[:dominance_ratio] || identity["dominance_ratio"]).to_f,
-
appearance_count: (identity[:appearance_count] || identity["appearance_count"]).to_i,
-
total_faces: (identity[:total_faces] || identity["total_faces"]).to_i
-
}
-
}
-
end
-
-
def compact_verified_profile_history(rows)
-
Array(rows).first(10).map do |row|
-
data = row.is_a?(Hash) ? row : {}
-
{
-
shortcode: data[:shortcode] || data["shortcode"],
-
taken_at: data[:taken_at] || data["taken_at"],
-
topics: Array(data[:topics] || data["topics"]).first(6),
-
objects: Array(data[:objects] || data["objects"]).first(6),
-
hashtags: Array(data[:hashtags] || data["hashtags"]).first(6),
-
mentions: Array(data[:mentions] || data["mentions"]).first(6),
-
face_count: (data[:face_count] || data["face_count"]).to_i,
-
primary_face_count: (data[:primary_face_count] || data["primary_face_count"]).to_i,
-
secondary_face_count: (data[:secondary_face_count] || data["secondary_face_count"]).to_i,
-
image_description: truncate_text(data[:image_description] || data["image_description"], max: 180)
-
}
-
end
-
end
-
-
def compact_conversational_voice(payload)
-
data = payload.is_a?(Hash) ? payload : {}
-
{
-
author_type: data[:author_type] || data["author_type"],
-
profile_tags: Array(data[:profile_tags] || data["profile_tags"]).first(10),
-
bio_keywords: Array(data[:bio_keywords] || data["bio_keywords"]).first(10),
-
recurring_topics: Array(data[:recurring_topics] || data["recurring_topics"]).first(12),
-
recurring_hashtags: Array(data[:recurring_hashtags] || data["recurring_hashtags"]).first(10),
-
frequent_people_labels: Array(data[:frequent_people_labels] || data["frequent_people_labels"]).first(8),
-
prior_comment_examples: Array(data[:prior_comment_examples] || data["prior_comment_examples"]).map { |value| truncate_text(value, max: 100) }.first(6)
-
}.compact
-
end
-
-
def compact_historical_story_context(rows)
-
cutoff = 45.days.ago
-
Array(rows).first(12).filter_map do |row|
-
data = row.is_a?(Hash) ? row : {}
-
occurred_at = parse_time(data[:occurred_at] || data["occurred_at"])
-
next if occurred_at && occurred_at < cutoff
-
-
{
-
occurred_at: occurred_at&.iso8601,
-
topics: Array(data[:topics] || data["topics"]).first(6),
-
objects: Array(data[:objects] || data["objects"]).first(6),
-
hashtags: Array(data[:hashtags] || data["hashtags"]).first(6),
-
mentions: Array(data[:mentions] || data["mentions"]).first(6),
-
profile_handles: Array(data[:profile_handles] || data["profile_handles"]).first(6),
-
recurring_people_ids: Array(data[:people] || data["people"]).map { |person| person.is_a?(Hash) ? (person[:person_id] || person["person_id"]) : nil }.compact.first(4),
-
face_count: (data[:face_count] || data["face_count"]).to_i
-
}
-
end.first(6)
-
end
-
-
def compact_author_profile(payload, author_type:)
-
data = payload.is_a?(Hash) ? payload : {}
-
{
-
username: data[:username] || data["username"],
-
display_name: truncate_text(data[:display_name] || data["display_name"], max: 80),
-
author_type: author_type.to_s.presence || "unknown",
-
bio_keywords: Array(data[:bio_keywords] || data["bio_keywords"]).map(&:to_s).reject(&:blank?).first(10)
-
}
-
end
-
-
def compact_identity_verification(payload)
-
data = payload.is_a?(Hash) ? payload : {}
-
{
-
owner_likelihood: data[:owner_likelihood] || data["owner_likelihood"],
-
confidence: (data[:confidence] || data["confidence"]).to_f,
-
primary_person_present: ActiveModel::Type::Boolean.new.cast(data[:primary_person_present] || data["primary_person_present"]),
-
recurring_primary_person: ActiveModel::Type::Boolean.new.cast(data[:recurring_primary_person] || data["recurring_primary_person"]),
-
bio_topic_overlap: Array(data[:bio_topic_overlap] || data["bio_topic_overlap"]).first(8),
-
age_consistency: data[:age_consistency] || data["age_consistency"],
-
gender_consistency: data[:gender_consistency] || data["gender_consistency"],
-
reason_codes: Array(data[:reason_codes] || data["reason_codes"]).first(10)
-
}
-
end
-
-
def compact_faces_payload(payload)
-
data = payload.is_a?(Hash) ? payload : {}
-
{
-
total_count: data[:total_count] || data["total_count"],
-
primary_user_count: data[:primary_user_count] || data["primary_user_count"],
-
secondary_person_count: data[:secondary_person_count] || data["secondary_person_count"],
-
unknown_count: data[:unknown_count] || data["unknown_count"],
-
people: Array(data[:people] || data["people"]).map do |row|
-
r = row.is_a?(Hash) ? row : {}
-
{
-
person_id: r[:person_id] || r["person_id"],
-
role: r[:role] || r["role"],
-
label: r[:label] || r["label"],
-
similarity: (r[:similarity] || r["similarity"]).to_f,
-
age_range: r[:age_range] || r["age_range"],
-
gender: r[:gender] || r["gender"]
-
}.compact
-
end.first(8)
-
}
-
end
-
-
def compact_object_detections(rows)
-
Array(rows).filter_map do |row|
-
data = row.is_a?(Hash) ? row : {}
-
label = (data[:label] || data["label"]).to_s.strip
-
next if label.blank?
-
-
{
-
label: label.downcase,
-
confidence: (data[:confidence] || data["confidence"] || data[:score] || data["score"]).to_f.round(3)
-
}
-
end.uniq.first(8)
-
end
-
-
def compact_scenes(rows)
-
Array(rows).filter_map do |row|
-
data = row.is_a?(Hash) ? row : {}
-
scene_type = (data[:type] || data["type"]).to_s.strip
-
next if scene_type.blank?
-
-
{
-
type: scene_type.downcase,
-
timestamp: (data[:timestamp] || data["timestamp"]).to_f.round(2)
-
}
-
end.uniq.first(8)
-
end
-
-
def parse_time(value)
-
return nil if value.to_s.blank?
-
-
Time.zone.parse(value.to_s)
-
rescue StandardError
-
nil
-
end
-
end
-
end
-
require "net/http"
-
require "json"
-
require "base64"
-
require "tempfile"
-
require "securerandom"
-
-
module Ai
-
class LocalMicroserviceClient
-
BASE_URL = ENV.fetch("LOCAL_AI_SERVICE_URL", "http://localhost:8000").freeze
-
HTTP_OPEN_TIMEOUT_SECONDS = ENV.fetch("LOCAL_AI_HTTP_OPEN_TIMEOUT_SECONDS", 20).to_i.clamp(3, 120)
-
HTTP_READ_TIMEOUT_SECONDS = ENV.fetch("LOCAL_AI_HTTP_READ_TIMEOUT_SECONDS", 120).to_i.clamp(10, 600)
-
MAX_IMAGE_UPLOAD_BYTES = ENV.fetch("LOCAL_AI_MAX_IMAGE_UPLOAD_BYTES", 20 * 1024 * 1024).to_i
-
MAX_VIDEO_UPLOAD_BYTES = ENV.fetch("LOCAL_AI_MAX_VIDEO_UPLOAD_BYTES", 80 * 1024 * 1024).to_i
-
MIN_IMAGE_UPLOAD_BYTES = ENV.fetch("LOCAL_AI_MIN_IMAGE_UPLOAD_BYTES", 128).to_i
-
MIN_VIDEO_UPLOAD_BYTES = ENV.fetch("LOCAL_AI_MIN_VIDEO_UPLOAD_BYTES", 1024).to_i
-
-
def initialize(service_url: nil)
-
@base_url = service_url || BASE_URL
-
end
-
-
def test_connection!
-
response = get_json("/health")
-
raise "Local AI service unavailable" unless response["status"] == "healthy"
-
-
{
-
ok: true,
-
message: "Local AI service is healthy",
-
services: response["services"]
-
}
-
rescue StandardError => e
-
{ ok: false, message: e.message.to_s }
-
end
-
-
def analyze_image_bytes!(bytes, features:, usage_category: "image_analysis", usage_context: nil)
-
bytes_data = bytes.to_s.b
-
validate_image_bytes!(bytes_data)
-
-
# Convert feature names to match microservice expectations
-
service_features = convert_features(features)
-
-
# Create temporary file for upload
-
temp_file = Tempfile.new(["image_analysis", ".jpg"])
-
begin
-
temp_file.binmode
-
temp_file.write(bytes_data)
-
temp_file.flush
-
-
# Upload to microservice
-
response = upload_file("/analyze/image", temp_file.path, { features: service_features.join(",") })
-
-
# Convert response to match Google Vision format
-
convert_vision_response(response)
-
ensure
-
temp_file.close
-
temp_file.unlink
-
end
-
end
-
-
def analyze_image_uri!(url, features:, usage_category: "image_analysis", usage_context: nil)
-
# Download image from URL
-
uri = URI.parse(url)
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.use_ssl = uri.scheme == "https"
-
http.open_timeout = HTTP_OPEN_TIMEOUT_SECONDS
-
http.read_timeout = [HTTP_READ_TIMEOUT_SECONDS, 90].min
-
-
response = http.get(uri.request_uri)
-
raise "Failed to download image: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
-
-
analyze_image_bytes!(response.body, features: features, usage_category: usage_category, usage_context: usage_context)
-
end
-
-
def analyze_video_bytes!(bytes, features:, usage_context: nil)
-
bytes_data = bytes.to_s.b
-
validate_video_bytes!(bytes_data)
-
service_features = convert_video_features(features)
-
-
temp_file = Tempfile.new(["video_analysis", ".mp4"])
-
begin
-
temp_file.binmode
-
temp_file.write(bytes_data)
-
temp_file.flush
-
-
response = upload_file("/analyze/video", temp_file.path, {
-
features: service_features.join(","),
-
sample_rate: 2 # Sample every 2 seconds
-
})
-
-
convert_video_response(response)
-
ensure
-
temp_file.close
-
temp_file.unlink
-
end
-
end
-
-
def fetch_video_operation!(name, usage_context: nil)
-
# Local microservice processes synchronously, so return completed status
-
{
-
"done" => true,
-
"response" => { "annotationResults" => [{}] }
-
}
-
end
-
-
def generate_text_json!(model:, prompt:, temperature: 0.8, max_output_tokens: 900, usage_category: "text_generation", usage_context: nil)
-
# Use Ollama for text generation
-
ollama_client = Ai::OllamaClient.new
-
-
response = ollama_client.generate(
-
model: model,
-
prompt: prompt,
-
temperature: temperature,
-
max_tokens: max_output_tokens
-
)
-
-
# Parse JSON response from LLM
-
parsed = JSON.parse(response["response"]) rescue nil
-
-
{
-
raw: response,
-
text: response["response"],
-
json: parsed,
-
usage: {
-
input_tokens: response.dig("prompt_eval_count") || 0,
-
output_tokens: response.dig("eval_count") || 0,
-
total_tokens: (response.dig("prompt_eval_count") || 0) + (response.dig("eval_count") || 0)
-
}
-
}
-
end
-
-
# Returns normalized payload for local story intelligence extraction.
-
# Expected keys:
-
# - faces: [{ confidence:, bounding_box:, landmarks:, likelihoods: {} }]
-
# - ocr_text: "..."
-
# - ocr_blocks: [{ text:, confidence:, bbox:, source: }]
-
# - content_labels: ["person", "beach", ...]
-
# - object_detections: [{ label:, confidence:, bbox: }]
-
# - location_tags: []
-
# - mentions: ["@user"]
-
# - hashtags: ["#tag"]
-
def detect_faces_and_ocr!(image_bytes:, usage_context: nil)
-
bytes_data = image_bytes.to_s.b
-
validate_image_bytes!(bytes_data)
-
-
temp_file = Tempfile.new(["story_intel", ".jpg"])
-
begin
-
temp_file.binmode
-
temp_file.write(bytes_data)
-
temp_file.flush
-
-
ocr_warning = nil
-
begin
-
response = upload_file("/analyze/image", temp_file.path, { features: "labels,text,faces" })
-
payload, results = unpack_response_payload!(
-
response: response,
-
operation: "detect_faces_and_ocr",
-
expected_keys: %w[labels text faces]
-
)
-
rescue StandardError => e
-
ocr_warning = {
-
"feature" => "text",
-
"error_class" => e.class.name.to_s,
-
"error_message" => e.message.to_s.byteslice(0, 260),
-
"fallback" => "labels_faces_only"
-
}
-
fallback_response = upload_file("/analyze/image", temp_file.path, { features: "labels,faces" })
-
payload, results = unpack_response_payload!(
-
response: fallback_response,
-
operation: "detect_faces_without_text",
-
expected_keys: %w[labels faces]
-
)
-
end
-
-
text_rows = Array(results["text"])
-
text_rows = text_rows.map do |row|
-
if row.is_a?(Hash)
-
source_name = row["source"].to_s.presence || "ocr"
-
variant_name = row["variant"].to_s.presence
-
{
-
"text" => row["text"].to_s,
-
"confidence" => row["confidence"],
-
"bbox" => normalize_bounding_box(row["bbox"]),
-
"source" => [source_name, variant_name].compact.join(":"),
-
"variant" => variant_name
-
}
-
else
-
{ "text" => row.to_s, "confidence" => nil, "bbox" => {}, "source" => "ocr", "variant" => nil }
-
end
-
end
-
ocr_blocks = text_rows
-
.map do |row|
-
{
-
"text" => row["text"].to_s.strip,
-
"confidence" => row["confidence"].to_f,
-
"bbox" => row["bbox"].is_a?(Hash) ? row["bbox"] : {},
-
"source" => row["source"].to_s.presence || "ocr",
-
"variant" => row["variant"].to_s.presence
-
}
-
end
-
.reject { |row| row["text"].blank? }
-
.first(80)
-
ocr_text = ocr_blocks.map { |row| row["text"] }.uniq.join("\n").presence
-
-
object_detections = Array(results["labels"])
-
.map do |row|
-
if row.is_a?(Hash)
-
{
-
"label" => (row["label"] || row["description"]).to_s,
-
"confidence" => (row["confidence"] || row["score"]).to_f,
-
"bbox" => normalize_bounding_box(row["bbox"])
-
}
-
else
-
{ "label" => row.to_s, "confidence" => nil, "bbox" => {} }
-
end
-
end
-
.reject { |row| row["label"].blank? }
-
.first(80)
-
-
labels = object_detections
-
.map { |row| row["label"] }
-
.map(&:to_s)
-
.map(&:strip)
-
.reject(&:blank?)
-
.uniq
-
.first(40)
-
-
faces = Array(results["faces"]).map { |face| normalize_face(face) }
-
mentions = ocr_text.to_s.scan(/@[a-zA-Z0-9._]+/).map(&:downcase).uniq.first(40)
-
hashtags = ocr_text.to_s.scan(/#[a-zA-Z0-9_]+/).map(&:downcase).uniq.first(40)
-
profile_handles = ocr_blocks
-
.flat_map { |row| row["text"].to_s.scan(/\b([a-zA-Z0-9._]{3,30})\b/) }
-
.map { |match| match.is_a?(Array) ? match.first.to_s.downcase : match.to_s.downcase }
-
.select { |token| token.include?("_") || token.include?(".") }
-
.reject { |token| token.include?("instagram.com") }
-
.uniq
-
.first(40)
-
-
{
-
"faces" => faces,
-
"ocr_text" => ocr_text,
-
"ocr_blocks" => ocr_blocks,
-
"location_tags" => [],
-
"content_labels" => labels,
-
"object_detections" => object_detections,
-
"mentions" => mentions,
-
"hashtags" => hashtags,
-
"profile_handles" => profile_handles,
-
"metadata" => {
-
"source" => "local_microservice",
-
"usage_context" => usage_context.to_h,
-
"warnings" => (
-
Array(payload.dig("metadata", "warnings")) +
-
Array(ocr_warning)
-
).first(20)
-
}
-
}
-
ensure
-
temp_file.close
-
temp_file.unlink
-
end
-
end
-
-
# Returns normalized story intelligence from /analyze/video.
-
# - scenes: [{ timestamp:, type:, correlation: }]
-
# - content_labels: [..]
-
# - object_detections: [{ label:, confidence:, timestamps: [] }]
-
# - ocr_text / ocr_blocks
-
# - faces: [{ first_seen:, last_seen:, detection_count: }]
-
# - mentions / hashtags
-
def analyze_video_story_intelligence!(video_bytes:, sample_rate: 2, usage_context: nil)
-
bytes_data = video_bytes.to_s.b
-
validate_video_bytes!(bytes_data)
-
-
temp_file = Tempfile.new(["story_video_intel", ".mp4"])
-
begin
-
temp_file.binmode
-
temp_file.write(bytes_data)
-
temp_file.flush
-
-
response = upload_file("/analyze/video", temp_file.path, {
-
features: "labels,faces,scenes,text",
-
sample_rate: sample_rate.to_i.clamp(1, 5)
-
})
-
payload, results = unpack_response_payload!(
-
response: response,
-
operation: "analyze_video_story_intelligence",
-
expected_keys: %w[labels faces scenes text]
-
)
-
-
scenes = Array(results["scenes"]).map do |row|
-
next unless row.is_a?(Hash)
-
{
-
"timestamp" => row["timestamp"],
-
"type" => row["type"].to_s.presence || "scene_change",
-
"correlation" => row["correlation"]
-
}.compact
-
end.compact.first(80)
-
-
object_detections = Array(results["labels"]).map do |row|
-
next unless row.is_a?(Hash)
-
label = (row["label"] || row["description"]).to_s.strip
-
next if label.blank?
-
-
{
-
"label" => label,
-
"confidence" => (row["max_confidence"] || row["confidence"]).to_f,
-
"timestamps" => Array(row["timestamps"]).map(&:to_f).first(80)
-
}
-
end.compact.first(80)
-
content_labels = object_detections.map { |row| row["label"].to_s.downcase }.uniq.first(50)
-
-
ocr_blocks = Array(results["text"]).map do |row|
-
next unless row.is_a?(Hash)
-
text = row["text"].to_s.strip
-
next if text.blank?
-
-
{
-
"text" => text,
-
"confidence" => row["confidence"].to_f,
-
"timestamp" => row["timestamp"],
-
"bbox" => normalize_bounding_box(row["bbox"]),
-
"source" => row["source"].to_s.presence || "ocr_video"
-
}.compact
-
end.compact.first(120)
-
ocr_text = ocr_blocks.map { |row| row["text"] }.uniq.join("\n").presence
-
-
faces = Array(results["faces"]).map do |row|
-
next unless row.is_a?(Hash)
-
{
-
"first_seen" => row["first_seen"],
-
"last_seen" => row["last_seen"],
-
"detection_count" => row["detection_count"].to_i
-
}.compact
-
end.compact.first(60)
-
-
mentions = ocr_text.to_s.scan(/@[a-zA-Z0-9._]+/).map(&:downcase).uniq.first(40)
-
hashtags = ocr_text.to_s.scan(/#[a-zA-Z0-9_]+/).map(&:downcase).uniq.first(40)
-
-
{
-
"scenes" => scenes,
-
"content_labels" => content_labels,
-
"object_detections" => object_detections,
-
"ocr_text" => ocr_text,
-
"ocr_blocks" => ocr_blocks,
-
"faces" => faces,
-
"mentions" => mentions,
-
"hashtags" => hashtags,
-
"metadata" => {
-
"source" => "local_microservice_video",
-
"usage_context" => usage_context.to_h,
-
"warnings" => Array(payload.dig("metadata", "warnings")).first(20)
-
}
-
}
-
ensure
-
temp_file.close
-
temp_file.unlink
-
end
-
end
-
-
private
-
-
def convert_features(google_features)
-
# Convert Google Vision feature names to local service names
-
feature_map = {
-
"LABEL_DETECTION" => "labels",
-
"TEXT_DETECTION" => "text",
-
"FACE_DETECTION" => "faces"
-
}
-
-
google_features.map { |f|
-
feature_type = f.is_a?(Hash) ? f[:type] || f["type"] : f.to_s
-
feature_map[feature_type]
-
}.compact.uniq
-
end
-
-
def convert_video_features(google_features)
-
# Convert Google Video Intelligence feature names
-
feature_map = {
-
"LABEL_DETECTION" => "labels",
-
"SHOT_CHANGE_DETECTION" => "scenes",
-
"FACE_DETECTION" => "faces",
-
"EXPLICIT_CONTENT_DETECTION" => "labels" # Use labels for explicit content
-
}
-
-
google_features.map { |f| feature_map[f.to_s] }.compact.uniq
-
end
-
-
def convert_vision_response(response)
-
_payload, results = unpack_response_payload!(
-
response: response,
-
operation: "analyze_image",
-
expected_keys: %w[labels text faces]
-
)
-
-
# Convert to Google Vision format
-
vision_response = {}
-
-
# Labels
-
if results.key?("labels")
-
vision_response["labelAnnotations"] = Array(results["labels"]).map do |label|
-
{
-
"description" => (label.is_a?(Hash) ? (label["label"] || label["description"]) : label).to_s,
-
"score" => (label.is_a?(Hash) ? (label["confidence"] || label["score"]) : nil),
-
"topicality" => (label.is_a?(Hash) ? (label["confidence"] || label["score"]) : nil)
-
}
-
end
-
end
-
-
# Text
-
if results.key?("text")
-
vision_response["textAnnotations"] = Array(results["text"]).map.with_index do |text, i|
-
entry = text.is_a?(Hash) ? text : { "text" => text.to_s, "confidence" => nil, "bbox" => nil }
-
{
-
"description" => entry["text"].to_s,
-
"confidence" => entry["confidence"],
-
"boundingPoly" => {
-
"vertices" => convert_bbox_to_vertices(entry["bbox"])
-
}
-
}
-
end
-
end
-
-
# Faces
-
if results.key?("faces")
-
vision_response["faceAnnotations"] = Array(results["faces"]).map do |face|
-
entry = face.is_a?(Hash) ? face : {}
-
{
-
"boundingPoly" => {
-
"vertices" => convert_bbox_to_vertices(entry["bbox"] || entry["bounding_box"])
-
},
-
"confidence" => entry["confidence"],
-
"landmarks" => convert_landmarks(entry["landmarks"])
-
}
-
end
-
end
-
-
vision_response
-
end
-
-
def convert_video_response(response)
-
_payload, results = unpack_response_payload!(
-
response: response,
-
operation: "analyze_video",
-
expected_keys: %w[labels scenes faces]
-
)
-
-
video_response = {
-
"annotationResults" => [{}]
-
}
-
-
# Labels
-
if results.key?("labels")
-
video_response["annotationResults"][0]["segmentLabelAnnotations"] = Array(results["labels"]).map do |label|
-
row = label.is_a?(Hash) ? label : { "label" => label.to_s, "max_confidence" => 0.0, "timestamps" => [] }
-
{
-
"entity" => {
-
"description" => (row["label"] || row["description"]).to_s,
-
"confidence" => (row["max_confidence"] || row["confidence"]).to_f
-
},
-
"segments" => Array(row["timestamps"]).map.with_index do |timestamp, i|
-
{
-
"segment" => {
-
"startTimeOffset" => "#{timestamp.to_i}s"
-
}
-
}
-
end
-
}
-
end
-
end
-
-
# Shot changes
-
if results.key?("scenes")
-
video_response["annotationResults"][0]["shotAnnotations"] = Array(results["scenes"]).map do |scene|
-
row = scene.is_a?(Hash) ? scene : {}
-
{
-
"startTimeOffset" => "#{row["timestamp"].to_i}s"
-
}
-
end
-
end
-
-
video_response
-
end
-
-
def convert_bbox_to_vertices(bbox)
-
return [] unless bbox
-
-
if bbox.is_a?(Array) && bbox.length == 4 && bbox.first.is_a?(Array)
-
# Format: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
-
bbox.map { |point| { "x" => point[0].to_i, "y" => point[1].to_i } }
-
elsif bbox.is_a?(Array) && bbox.length == 4
-
# Format: [x1, y1, x2, y2]
-
[
-
{ "x" => bbox[0].to_i, "y" => bbox[1].to_i },
-
{ "x" => bbox[2].to_i, "y" => bbox[1].to_i },
-
{ "x" => bbox[2].to_i, "y" => bbox[3].to_i },
-
{ "x" => bbox[0].to_i, "y" => bbox[3].to_i }
-
]
-
elsif bbox.is_a?(Hash)
-
x1 = (bbox["x1"] || bbox[:x1] || bbox["left"] || bbox[:left]).to_f
-
y1 = (bbox["y1"] || bbox[:y1] || bbox["top"] || bbox[:top]).to_f
-
x2 = (bbox["x2"] || bbox[:x2] || bbox["right"] || bbox[:right]).to_f
-
y2 = (bbox["y2"] || bbox[:y2] || bbox["bottom"] || bbox[:bottom]).to_f
-
[
-
{ "x" => x1.to_i, "y" => y1.to_i },
-
{ "x" => x2.to_i, "y" => y1.to_i },
-
{ "x" => x2.to_i, "y" => y2.to_i },
-
{ "x" => x1.to_i, "y" => y2.to_i }
-
]
-
else
-
[]
-
end
-
end
-
-
def normalize_face(face)
-
raw = face.is_a?(Hash) ? face : {}
-
bbox = raw["bounding_box"] || raw["bbox"] || raw[:bounding_box] || raw[:bbox]
-
landmarks_raw = raw["landmarks"] || raw[:landmarks]
-
-
{
-
"confidence" => (raw["confidence"] || raw[:confidence]).to_f,
-
"bounding_box" => normalize_bounding_box(bbox),
-
"landmarks" => normalize_landmarks(landmarks_raw),
-
"likelihoods" => (raw["likelihoods"] || raw[:likelihoods] || {})
-
}
-
end
-
-
def normalize_bounding_box(value)
-
if value.is_a?(Array) && value.length == 4 && value.first.is_a?(Numeric)
-
{ "x1" => value[0], "y1" => value[1], "x2" => value[2], "y2" => value[3] }
-
elsif value.is_a?(Array) && value.length == 4 && value.first.is_a?(Array)
-
xs = value.map { |pt| pt[0].to_f }
-
ys = value.map { |pt| pt[1].to_f }
-
{ "x1" => xs.min, "y1" => ys.min, "x2" => xs.max, "y2" => ys.max }
-
elsif value.is_a?(Hash)
-
value
-
else
-
{}
-
end
-
end
-
-
def normalize_landmarks(value)
-
Array(value).first(24).filter_map do |item|
-
if item.is_a?(Hash)
-
{
-
"type" => item["type"].to_s.presence || "UNKNOWN",
-
"x" => item["x"] || item.dig("position", "x"),
-
"y" => item["y"] || item.dig("position", "y"),
-
"z" => item["z"] || item.dig("position", "z")
-
}
-
elsif item.is_a?(Array)
-
{ "type" => "UNKNOWN", "x" => item[0], "y" => item[1], "z" => item[2] }
-
end
-
end
-
end
-
-
def convert_landmarks(landmarks)
-
return [] unless landmarks
-
-
landmarks.map do |landmark|
-
if landmark.is_a?(Hash)
-
x = landmark["x"] || landmark[:x] || landmark.dig("position", "x")
-
y = landmark["y"] || landmark[:y] || landmark.dig("position", "y")
-
z = landmark["z"] || landmark[:z] || landmark.dig("position", "z")
-
{
-
"type" => (landmark["type"] || landmark[:type] || "UNKNOWN_LANDMARK").to_s,
-
"position" => {
-
"x" => x.to_f.to_i,
-
"y" => y.to_f.to_i,
-
"z" => z.to_f.to_i
-
}
-
}
-
else
-
{
-
"type" => "UNKNOWN_LANDMARK", # Would need proper mapping
-
"position" => {
-
"x" => landmark[0].to_i,
-
"y" => landmark[1].to_i,
-
"z" => (landmark[2].to_i rescue 0)
-
}
-
}
-
end
-
end
-
end
-
-
def get_json(endpoint)
-
uri = URI.parse("#{@base_url}#{endpoint}")
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.open_timeout = HTTP_OPEN_TIMEOUT_SECONDS
-
http.read_timeout = HTTP_READ_TIMEOUT_SECONDS
-
-
request = Net::HTTP::Get.new(uri.request_uri)
-
request["Accept"] = "application/json"
-
-
response = http.request(request)
-
body = JSON.parse(response.body.to_s.presence || "{}")
-
-
return body if response.is_a?(Net::HTTPSuccess)
-
-
error = extract_http_error_message(body: body, raw_body: response.body)
-
raise "Local AI service error: HTTP #{response.code} #{response.message} - #{error}"
-
rescue JSON::ParserError
-
raise "Local AI service error: HTTP #{response.code} #{response.message} - #{response.body.to_s.byteslice(0, 500)}"
-
end
-
-
def upload_file(endpoint, file_path, params = {})
-
uri = URI.parse("#{@base_url}#{endpoint}")
-
-
# Create multipart form data
-
boundary = "----WebKitFormBoundary#{SecureRandom.hex(16)}"
-
-
post_body = []
-
-
# Add file
-
file_content = File.read(file_path)
-
filename = File.basename(file_path)
-
post_body << "--#{boundary}\r\n"
-
post_body << "Content-Disposition: form-data; name=\"file\"; filename=\"#{filename}\"\r\n"
-
post_body << "Content-Type: application/octet-stream\r\n\r\n"
-
post_body << file_content
-
post_body << "\r\n"
-
-
# Add parameters
-
params.each do |key, value|
-
post_body << "--#{boundary}\r\n"
-
post_body << "Content-Disposition: form-data; name=\"#{key}\"\r\n\r\n"
-
post_body << value.to_s
-
post_body << "\r\n"
-
end
-
-
post_body << "--#{boundary}--\r\n"
-
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.open_timeout = HTTP_OPEN_TIMEOUT_SECONDS
-
http.read_timeout = HTTP_READ_TIMEOUT_SECONDS
-
-
request = Net::HTTP::Post.new(uri.request_uri)
-
request["Content-Type"] = "multipart/form-data; boundary=#{boundary}"
-
request["Accept"] = "application/json"
-
request.body = post_body.join
-
-
response = http.request(request)
-
body = JSON.parse(response.body.to_s.presence || "{}")
-
-
return body if response.is_a?(Net::HTTPSuccess)
-
-
error = extract_http_error_message(body: body, raw_body: response.body)
-
raise "Local AI service error: HTTP #{response.code} #{response.message} - #{error}"
-
rescue JSON::ParserError
-
raise "Local AI service error: HTTP #{response.code} #{response.message} - #{response.body.to_s.byteslice(0, 500)}"
-
end
-
-
def unpack_response_payload!(response:, operation:, expected_keys:)
-
payload = response.is_a?(Hash) ? deep_stringify_hash(response) : {}
-
results = payload["results"].is_a?(Hash) ? payload["results"] : payload
-
explicit_failure = payload.key?("success") && !ActiveModel::Type::Boolean.new.cast(payload["success"])
-
has_expected_keys = Array(expected_keys).map(&:to_s).any? { |key| results.key?(key) }
-
-
if explicit_failure && !has_expected_keys
-
raise "Local AI #{operation} failed: #{response_error_message(payload)}"
-
end
-
-
if results.empty? && !has_expected_keys
-
if explicit_failure
-
raise "Local AI #{operation} failed: #{response_error_message(payload)}"
-
end
-
end
-
-
[ payload, results ]
-
end
-
-
def response_error_message(payload)
-
return "unknown error" unless payload.is_a?(Hash)
-
-
error_value = payload["error"]
-
nested_error = error_value.is_a?(Hash) ? error_value["message"].to_s.presence : nil
-
-
nested_error ||
-
error_value.to_s.presence ||
-
payload["message"].to_s.presence ||
-
payload["detail"].to_s.presence ||
-
"unknown error"
-
end
-
-
def validate_image_bytes!(bytes)
-
raise ArgumentError, "image_bytes_missing" if bytes.blank?
-
raise ArgumentError, "image_bytes_too_small" if bytes.bytesize < MIN_IMAGE_UPLOAD_BYTES
-
raise ArgumentError, "image_bytes_too_large" if bytes.bytesize > MAX_IMAGE_UPLOAD_BYTES
-
end
-
-
def validate_video_bytes!(bytes)
-
raise ArgumentError, "video_bytes_missing" if bytes.blank?
-
raise ArgumentError, "video_bytes_too_small" if bytes.bytesize < MIN_VIDEO_UPLOAD_BYTES
-
raise ArgumentError, "video_bytes_too_large" if bytes.bytesize > MAX_VIDEO_UPLOAD_BYTES
-
end
-
-
def extract_http_error_message(body:, raw_body:)
-
payload = body.is_a?(Hash) ? body : {}
-
error_value = payload["error"]
-
nested_error = error_value.is_a?(Hash) ? error_value["message"].to_s.presence : nil
-
detail =
-
case payload["detail"]
-
when Hash
-
payload["detail"]["message"].to_s.presence
-
else
-
payload["detail"].to_s.presence
-
end
-
-
nested_error ||
-
error_value.to_s.presence ||
-
payload["message"].to_s.presence ||
-
detail ||
-
raw_body.to_s.byteslice(0, 500)
-
end
-
-
def deep_stringify_hash(value)
-
case value
-
when Hash
-
value.each_with_object({}) do |(key, child), out|
-
out[key.to_s] = deep_stringify_hash(child)
-
end
-
when Array
-
value.map { |child| deep_stringify_hash(child) }
-
else
-
value
-
end
-
end
-
end
-
end
-
require "net/http"
-
require "json"
-
-
module Ai
-
class OllamaClient
-
BASE_URL = ENV.fetch("OLLAMA_URL", "http://localhost:11434").freeze
-
DEFAULT_MODEL = ENV.fetch("OLLAMA_MODEL", "mistral:7b").freeze
-
OPEN_TIMEOUT_SECONDS = ENV.fetch("OLLAMA_OPEN_TIMEOUT_SECONDS", "12").to_i.clamp(5, 60)
-
READ_TIMEOUT_SECONDS = ENV.fetch("OLLAMA_READ_TIMEOUT_SECONDS", "240").to_i.clamp(30, 600)
-
-
def initialize(base_url: nil, default_model: nil)
-
@base_url = base_url || BASE_URL
-
@default_model = default_model || DEFAULT_MODEL
-
end
-
-
def test_connection!
-
response = get_json("/api/tags")
-
models = response["models"] || []
-
-
{
-
ok: true,
-
message: "Ollama is available",
-
models: models.map { |m| m["name"] },
-
default_model: @default_model
-
}
-
rescue StandardError => e
-
{ ok: false, message: e.message.to_s }
-
end
-
-
def generate(model:, prompt:, temperature: 0.8, max_tokens: 900)
-
payload = {
-
model: model || @default_model,
-
prompt: prompt,
-
options: {
-
temperature: temperature,
-
num_predict: max_tokens
-
},
-
keep_alive: ENV.fetch("OLLAMA_KEEP_ALIVE", "10m"),
-
stream: false
-
}
-
-
response = post_json("/api/generate", payload)
-
-
{
-
"model" => response["model"],
-
"response" => response["response"],
-
"done" => response["done"],
-
"prompt_eval_count" => response["prompt_eval_count"],
-
"eval_count" => response["eval_count"],
-
"total_duration" => response["total_duration"],
-
"load_duration" => response["load_duration"]
-
}
-
end
-
-
def chat(model:, messages:, temperature: 0.8, max_tokens: 900)
-
payload = {
-
model: model || @default_model,
-
messages: messages,
-
options: {
-
temperature: temperature,
-
num_predict: max_tokens
-
},
-
keep_alive: ENV.fetch("OLLAMA_KEEP_ALIVE", "10m"),
-
stream: false
-
}
-
-
response = post_json("/api/chat", payload)
-
-
{
-
"model" => response["model"],
-
"message" => response["message"],
-
"done" => response["done"],
-
"prompt_eval_count" => response["prompt_eval_count"],
-
"eval_count" => response["eval_count"]
-
}
-
end
-
-
def list_models
-
response = get_json("/api/tags")
-
response["models"] || []
-
end
-
-
def pull_model(model_name)
-
# This would need to be a streaming implementation for real use
-
# For now, just trigger the pull
-
payload = { name: model_name }
-
post_json("/api/pull", payload)
-
end
-
-
private
-
-
def get_json(endpoint)
-
uri = URI.parse("#{@base_url}#{endpoint}")
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.open_timeout = OPEN_TIMEOUT_SECONDS
-
http.read_timeout = [READ_TIMEOUT_SECONDS, 60].min
-
-
request = Net::HTTP::Get.new(uri.request_uri)
-
request["Accept"] = "application/json"
-
-
response = http.request(request)
-
body = JSON.parse(response.body.to_s.presence || "{}")
-
-
return body if response.is_a?(Net::HTTPSuccess)
-
-
error = body["error"].presence || response.body.to_s.byteslice(0, 500)
-
raise "Ollama error: HTTP #{response.code} #{response.message} - #{error}"
-
rescue JSON::ParserError
-
raise "Ollama error: HTTP #{response.code} #{response.message} - #{response.body.to_s.byteslice(0, 500)}"
-
end
-
-
def post_json(endpoint, payload)
-
uri = URI.parse("#{@base_url}#{endpoint}")
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.open_timeout = OPEN_TIMEOUT_SECONDS
-
http.read_timeout = READ_TIMEOUT_SECONDS
-
-
request = Net::HTTP::Post.new(uri.request_uri)
-
request["Content-Type"] = "application/json"
-
request["Accept"] = "application/json"
-
request.body = JSON.generate(payload)
-
-
response = http.request(request)
-
body = JSON.parse(response.body.to_s.presence || "{}")
-
-
return body if response.is_a?(Net::HTTPSuccess)
-
-
error = body["error"].presence || response.body.to_s.byteslice(0, 500)
-
raise "Ollama error: HTTP #{response.code} #{response.message} - #{error}"
-
rescue JSON::ParserError
-
raise "Ollama error: HTTP #{response.code} #{response.message} - #{response.body.to_s.byteslice(0, 500)}"
-
end
-
end
-
end
-
require "base64"
-
require "digest"
-
require "uri"
-
-
module Ai
-
class PostAnalysisContextBuilder
-
MAX_INLINE_IMAGE_BYTES = ENV.fetch("AI_MAX_INLINE_IMAGE_BYTES", 2 * 1024 * 1024).to_i
-
MAX_INLINE_VIDEO_BYTES = ENV.fetch("AI_MAX_INLINE_VIDEO_BYTES", 12 * 1024 * 1024).to_i
-
MAX_DIRECT_IMAGE_ANALYSIS_BYTES = ENV.fetch("AI_MAX_DIRECT_IMAGE_ANALYSIS_BYTES", 10 * 1024 * 1024).to_i
-
MAX_DIRECT_VIDEO_ANALYSIS_BYTES = ENV.fetch("AI_MAX_DIRECT_VIDEO_ANALYSIS_BYTES", 40 * 1024 * 1024).to_i
-
MAX_ABSOLUTE_MEDIA_BYTES = ENV.fetch("AI_MAX_ABSOLUTE_MEDIA_BYTES", 120 * 1024 * 1024).to_i
-
MIN_MEDIA_BYTES = ENV.fetch("AI_MIN_MEDIA_BYTES", 512).to_i
-
IMAGE_RESIZE_MAX_DIMENSION = ENV.fetch("AI_IMAGE_RESIZE_MAX_DIMENSION", 1920).to_i
-
MAX_VIDEO_FRAME_ANALYSIS_BYTES = ENV.fetch("AI_VIDEO_FRAME_MAX_BYTES", 35 * 1024 * 1024).to_i
-
-
def initialize(profile:, post:)
-
@profile = profile
-
@post = post
-
end
-
-
attr_reader :profile, :post
-
-
def payload
-
{
-
post: {
-
shortcode: post.shortcode,
-
caption: post.caption,
-
taken_at: post.taken_at&.iso8601,
-
permalink: post.permalink_url,
-
likes_count: post.likes_count,
-
comments_count: post.comments_count,
-
comments: post.instagram_profile_post_comments.recent_first.limit(25).map do |comment|
-
{
-
author_username: comment.author_username,
-
body: comment.body,
-
commented_at: comment.commented_at&.iso8601
-
}
-
end
-
},
-
author_profile: {
-
username: profile.username,
-
display_name: profile.display_name,
-
bio: profile.bio,
-
can_message: profile.can_message,
-
tags: profile.profile_tags.pluck(:name).sort
-
},
-
rules: {
-
require_manual_review: true,
-
style: "gen_z_light"
-
}
-
}
-
end
-
-
def media_payload
-
return none_media_payload(reason: "media_missing") unless post.media.attached?
-
-
blob = post.media.blob
-
return none_media_payload(reason: "media_blob_missing") unless blob
-
-
content_type = blob.content_type.to_s
-
byte_size = blob.byte_size.to_i
-
is_image = content_type.start_with?("image/")
-
is_video = content_type.start_with?("video/")
-
return none_media_payload(reason: "unsupported_content_type", content_type: content_type) unless is_image || is_video
-
return none_media_payload(reason: "zero_byte_blob", content_type: content_type, byte_size: byte_size) if byte_size <= 0
-
return none_media_payload(reason: "media_too_large", content_type: content_type, byte_size: byte_size, max_bytes: MAX_ABSOLUTE_MEDIA_BYTES) if byte_size > MAX_ABSOLUTE_MEDIA_BYTES
-
-
media_type = is_video ? "video" : "image"
-
media_url = normalize_url(post.source_media_url)
-
if is_image && byte_size > MAX_INLINE_IMAGE_BYTES && media_url.present?
-
return url_media_payload(type: media_type, content_type: content_type, url: media_url, byte_size: byte_size)
-
end
-
if is_video && byte_size > MAX_INLINE_VIDEO_BYTES && media_url.present?
-
return url_media_payload(type: media_type, content_type: content_type, url: media_url, byte_size: byte_size)
-
end
-
if is_video && byte_size > MAX_DIRECT_VIDEO_ANALYSIS_BYTES
-
return none_media_payload(
-
reason: "video_too_large_for_direct_analysis",
-
content_type: content_type,
-
byte_size: byte_size,
-
max_bytes: MAX_DIRECT_VIDEO_ANALYSIS_BYTES
-
)
-
end
-
-
data =
-
if is_image && byte_size > MAX_DIRECT_IMAGE_ANALYSIS_BYTES
-
resize_image_blob(blob: blob)
-
else
-
blob.download
-
end
-
-
data = data.to_s.b
-
return none_media_payload(reason: "media_bytes_missing", content_type: content_type, byte_size: byte_size) if data.blank?
-
return none_media_payload(reason: "media_bytes_too_small", content_type: content_type, byte_size: data.bytesize, min_bytes: MIN_MEDIA_BYTES) if data.bytesize < MIN_MEDIA_BYTES
-
return none_media_payload(reason: "media_signature_invalid", content_type: content_type, byte_size: data.bytesize) unless valid_signature?(content_type: content_type, bytes: data)
-
-
payload = {
-
type: media_type,
-
content_type: content_type,
-
bytes: data,
-
source: (is_image && byte_size > MAX_DIRECT_IMAGE_ANALYSIS_BYTES) ? "resized_blob" : "blob",
-
byte_size: data.bytesize
-
}
-
if is_image && data.bytesize <= MAX_INLINE_IMAGE_BYTES
-
encoded = Base64.strict_encode64(data)
-
payload[:image_data_url] = "data:#{content_type};base64,#{encoded}"
-
end
-
payload
-
rescue StandardError => e
-
none_media_payload(
-
reason: "media_payload_error",
-
content_type: blob&.content_type.to_s,
-
byte_size: blob&.byte_size.to_i,
-
error: "#{e.class}: #{e.message}"
-
)
-
end
-
-
def media_fingerprint(media: nil)
-
fingerprint = post.media_url_fingerprint.to_s
-
return fingerprint if fingerprint.present?
-
-
if post.media.attached?
-
checksum = post.media.blob&.checksum.to_s
-
return "blob:#{checksum}" if checksum.present?
-
end
-
-
normalized_url = normalize_url(post.source_media_url)
-
return Digest::SHA256.hexdigest(normalized_url) if normalized_url.present?
-
-
payload = media || media_payload
-
bytes = payload[:bytes]
-
return Digest::SHA256.hexdigest(bytes) if bytes.present?
-
-
nil
-
end
-
-
def detection_image_payload
-
return { skipped: true, reason: "media_missing" } unless post.media.attached?
-
-
content_type = post.media.blob&.content_type.to_s
-
if content_type.start_with?("image/")
-
return {
-
skipped: false,
-
image_bytes: post.media.download,
-
detection_source: "post_media_image",
-
content_type: content_type
-
}
-
end
-
-
if content_type.start_with?("video/")
-
if post.preview_image.attached?
-
return {
-
skipped: false,
-
image_bytes: post.preview_image.download,
-
detection_source: "post_preview_image",
-
content_type: post.preview_image.blob&.content_type.to_s
-
}
-
end
-
-
begin
-
generated_preview = post.media.preview(resize_to_limit: [ 960, 960 ]).processed
-
preview_blob = generated_preview.respond_to?(:image) ? generated_preview.image : nil
-
return {
-
skipped: false,
-
image_bytes: generated_preview.download,
-
detection_source: "post_generated_video_preview",
-
content_type: preview_blob&.content_type.to_s.presence || "image/jpeg"
-
}
-
rescue StandardError
-
return {
-
skipped: true,
-
reason: "video_preview_unavailable",
-
content_type: content_type
-
}
-
end
-
end
-
-
{
-
skipped: true,
-
reason: "unsupported_content_type",
-
content_type: content_type
-
}
-
rescue StandardError => e
-
{
-
skipped: true,
-
reason: "media_load_error",
-
error: e.message.to_s,
-
content_type: content_type.to_s
-
}
-
end
-
-
def video_payload
-
return { skipped: true, reason: "media_missing" } unless post.media.attached?
-
-
blob = post.media.blob
-
content_type = blob&.content_type.to_s
-
return { skipped: true, reason: "not_video", content_type: content_type } unless content_type.to_s.start_with?("video/")
-
-
if blob.byte_size.to_i > MAX_VIDEO_FRAME_ANALYSIS_BYTES
-
return {
-
skipped: true,
-
reason: "video_too_large_for_frame_analysis",
-
content_type: content_type,
-
byte_size: blob.byte_size.to_i,
-
max_bytes: MAX_VIDEO_FRAME_ANALYSIS_BYTES
-
}
-
end
-
-
{
-
skipped: false,
-
video_bytes: blob.download,
-
content_type: content_type,
-
reference_id: "post_media_#{post.id}"
-
}
-
rescue StandardError => e
-
{
-
skipped: true,
-
reason: "video_load_error",
-
error: e.message.to_s
-
}
-
end
-
-
private
-
-
def url_media_payload(type:, content_type:, url:, byte_size:)
-
{
-
type: type.to_s,
-
content_type: content_type.to_s,
-
url: url.to_s,
-
source: "source_media_url",
-
byte_size: byte_size.to_i
-
}
-
end
-
-
def none_media_payload(reason:, content_type: nil, byte_size: nil, max_bytes: nil, min_bytes: nil, error: nil)
-
{
-
type: "none",
-
reason: reason.to_s,
-
content_type: content_type.to_s.presence,
-
byte_size: byte_size,
-
max_bytes: max_bytes,
-
min_bytes: min_bytes,
-
error: error.to_s.presence
-
}.compact
-
end
-
-
def resize_image_blob(blob:)
-
variant = post.media.variant(resize_to_limit: [ IMAGE_RESIZE_MAX_DIMENSION, IMAGE_RESIZE_MAX_DIMENSION ])
-
variant.processed.download
-
rescue StandardError
-
blob.download
-
end
-
-
def valid_signature?(content_type:, bytes:)
-
type = content_type.to_s.downcase
-
return false if bytes.blank?
-
-
if type.include?("jpeg")
-
return bytes.start_with?("\xFF\xD8".b)
-
end
-
if type.include?("png")
-
return bytes.start_with?("\x89PNG\r\n\x1A\n".b)
-
end
-
if type.include?("gif")
-
return bytes.start_with?("GIF87a".b) || bytes.start_with?("GIF89a".b)
-
end
-
if type.include?("webp")
-
return bytes.bytesize >= 12 && bytes.byteslice(0, 4) == "RIFF" && bytes.byteslice(8, 4) == "WEBP"
-
end
-
if type.include?("heic") || type.include?("heif")
-
return bytes.bytesize >= 12 && bytes.byteslice(4, 4) == "ftyp"
-
end
-
if type.start_with?("video/")
-
return bytes.bytesize >= 12 && bytes.byteslice(4, 4) == "ftyp" if type.include?("mp4") || type.include?("quicktime")
-
return bytes.bytesize >= 4 && bytes.byteslice(0, 4) == "\x1A\x45\xDF\xA3".b if type.include?("webm")
-
end
-
-
true
-
end
-
-
def normalize_url(raw)
-
value = raw.to_s.strip
-
return nil if value.blank?
-
-
uri = URI.parse(value)
-
return value unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
-
-
"#{uri.scheme}://#{uri.host}#{uri.path}"
-
rescue StandardError
-
value
-
end
-
end
-
end
-
require "securerandom"
-
-
module Ai
-
class PostAnalysisPipelineState
-
STEP_KEYS = %w[visual face ocr video metadata].freeze
-
TERMINAL_STATUSES = %w[succeeded failed skipped].freeze
-
PIPELINE_TERMINAL_STATUSES = %w[completed failed].freeze
-
-
DEFAULT_TASK_FLAGS = {
-
"analyze_visual" => true,
-
"analyze_faces" => true,
-
"run_ocr" => true,
-
"run_video" => true,
-
"run_metadata" => true,
-
"generate_comments" => true,
-
"enforce_comment_evidence_policy" => true,
-
"retry_on_incomplete_profile" => true
-
}.freeze
-
-
TASK_TO_STEP = {
-
"analyze_visual" => "visual",
-
"analyze_faces" => "face",
-
"run_ocr" => "ocr",
-
"run_video" => "video",
-
"run_metadata" => "metadata"
-
}.freeze
-
-
def initialize(post:)
-
@post = post
-
end
-
-
attr_reader :post
-
-
def start!(task_flags: {}, source_job: nil, run_id: SecureRandom.uuid)
-
normalized_flags = normalize_task_flags(task_flags)
-
required_steps = required_steps_for(flags: normalized_flags)
-
now = iso_timestamp
-
-
post.with_lock do
-
metadata = metadata_for(post)
-
metadata.delete("ai_pipeline_failure")
-
metadata["ai_pipeline"] = {
-
"run_id" => run_id,
-
"status" => "running",
-
"source_job" => source_job.to_s.presence,
-
"created_at" => now,
-
"updated_at" => now,
-
"task_flags" => normalized_flags,
-
"required_steps" => required_steps,
-
"steps" => build_initial_steps(required_steps: required_steps, at: now)
-
}.compact
-
-
post.update!(
-
ai_status: "running",
-
analyzed_at: nil,
-
metadata: metadata
-
)
-
end
-
-
run_id
-
end
-
-
def pipeline_for(run_id:)
-
pipeline = current_pipeline
-
return nil unless pipeline.is_a?(Hash)
-
return nil unless pipeline["run_id"].to_s == run_id.to_s
-
-
pipeline
-
end
-
-
def current_pipeline
-
metadata_for(post)["ai_pipeline"]
-
end
-
-
def required_steps(run_id:)
-
pipeline = pipeline_for(run_id: run_id)
-
return [] unless pipeline.is_a?(Hash)
-
-
Array(pipeline["required_steps"]).map(&:to_s)
-
end
-
-
def step_state(run_id:, step:)
-
pipeline = pipeline_for(run_id: run_id)
-
return nil unless pipeline.is_a?(Hash)
-
-
pipeline.dig("steps", step.to_s)
-
end
-
-
def step_terminal?(run_id:, step:)
-
TERMINAL_STATUSES.include?(step_state(run_id: run_id, step: step).to_h["status"].to_s)
-
end
-
-
def pipeline_terminal?(run_id:)
-
PIPELINE_TERMINAL_STATUSES.include?(pipeline_for(run_id: run_id).to_h["status"].to_s)
-
end
-
-
def mark_step_running!(run_id:, step:, queue_name:, active_job_id:)
-
mark_step!(
-
run_id: run_id,
-
step: step,
-
status: "running",
-
queue_name: queue_name,
-
active_job_id: active_job_id,
-
started_at: iso_timestamp
-
)
-
end
-
-
def mark_step_queued!(run_id:, step:, queue_name:, active_job_id:, result: nil)
-
mark_step!(
-
run_id: run_id,
-
step: step,
-
status: "queued",
-
queue_name: queue_name,
-
active_job_id: active_job_id,
-
result: result,
-
started_at: nil
-
)
-
end
-
-
def mark_step_completed!(run_id:, step:, status:, result: nil, error: nil)
-
normalized_status = status.to_s
-
normalized_status = "failed" unless (TERMINAL_STATUSES + [ "queued", "running", "pending" ]).include?(normalized_status)
-
-
mark_step!(
-
run_id: run_id,
-
step: step,
-
status: normalized_status,
-
result: result,
-
error: error,
-
finished_at: iso_timestamp
-
)
-
end
-
-
def all_required_steps_terminal?(run_id:)
-
required = required_steps(run_id: run_id)
-
return false if required.empty?
-
-
required.all? do |step|
-
TERMINAL_STATUSES.include?(step_state(run_id: run_id, step: step).to_h["status"].to_s)
-
end
-
end
-
-
def core_steps_terminal?(run_id:)
-
required = required_steps(run_id: run_id)
-
core = required - [ "metadata" ]
-
return true if core.empty?
-
-
core.all? do |step|
-
TERMINAL_STATUSES.include?(step_state(run_id: run_id, step: step).to_h["status"].to_s)
-
end
-
end
-
-
def mark_pipeline_finished!(run_id:, status:, details: nil)
-
with_pipeline_update(run_id: run_id) do |pipeline, _metadata|
-
pipeline["status"] = status.to_s
-
pipeline["updated_at"] = iso_timestamp
-
pipeline["finished_at"] = iso_timestamp
-
pipeline["details"] = details if details.present?
-
end
-
end
-
-
def required_step_pending?(run_id:, step:)
-
required = required_steps(run_id: run_id)
-
return false unless required.include?(step.to_s)
-
-
step_row = step_state(run_id: run_id, step: step).to_h
-
step_row["status"].to_s.in?([ "", "pending" ])
-
end
-
-
private
-
-
def mark_step!(run_id:, step:, status:, queue_name: nil, active_job_id: nil, result: nil, error: nil, started_at: nil, finished_at: nil)
-
with_pipeline_update(run_id: run_id) do |pipeline, _metadata|
-
key = step.to_s
-
steps = pipeline["steps"].is_a?(Hash) ? pipeline["steps"] : {}
-
row = steps[key].is_a?(Hash) ? steps[key] : {}
-
-
# Count attempts only when a worker actually starts execution.
-
attempts = row["attempts"].to_i
-
attempts += 1 if status.to_s == "running"
-
-
row["status"] = status.to_s
-
row["queue_name"] = queue_name if queue_name.present?
-
row["active_job_id"] = active_job_id if active_job_id.present?
-
row["started_at"] = started_at if started_at.present?
-
row["finished_at"] = finished_at if finished_at.present?
-
row["attempts"] = attempts
-
row["result"] = result if result.is_a?(Hash)
-
row["error"] = error.to_s if error.present?
-
-
steps[key] = row
-
pipeline["steps"] = steps
-
pipeline["updated_at"] = iso_timestamp
-
end
-
end
-
-
def with_pipeline_update(run_id:)
-
post.with_lock do
-
metadata = metadata_for(post)
-
pipeline = metadata["ai_pipeline"]
-
return nil unless pipeline.is_a?(Hash)
-
return nil unless pipeline["run_id"].to_s == run_id.to_s
-
-
yield(pipeline, metadata)
-
-
metadata["ai_pipeline"] = pipeline
-
post.update!(metadata: metadata)
-
pipeline
-
end
-
end
-
-
def normalize_task_flags(task_flags)
-
incoming = deep_stringify(task_flags.is_a?(Hash) ? task_flags : {})
-
normalized = DEFAULT_TASK_FLAGS.deep_dup
-
-
incoming.each do |key, value|
-
next unless normalized.key?(key)
-
-
normalized[key] = ActiveModel::Type::Boolean.new.cast(value)
-
end
-
-
normalized["run_video"] = false unless video_media_available?
-
normalized
-
end
-
-
def required_steps_for(flags:)
-
TASK_TO_STEP.each_with_object([]) do |(flag_key, step_key), steps|
-
steps << step_key if ActiveModel::Type::Boolean.new.cast(flags[flag_key])
-
end
-
end
-
-
def video_media_available?
-
return false unless post.media.attached?
-
-
post.media.blob&.content_type.to_s.start_with?("video/")
-
rescue StandardError
-
false
-
end
-
-
def build_initial_steps(required_steps:, at:)
-
STEP_KEYS.each_with_object({}) do |step, out|
-
if required_steps.include?(step)
-
out[step] = {
-
"status" => "pending",
-
"attempts" => 0,
-
"queue_name" => nil,
-
"active_job_id" => nil,
-
"started_at" => nil,
-
"finished_at" => nil,
-
"result" => {},
-
"error" => nil,
-
"created_at" => at
-
}
-
else
-
out[step] = {
-
"status" => "skipped",
-
"attempts" => 0,
-
"result" => { "reason" => "task_disabled" },
-
"created_at" => at,
-
"finished_at" => at
-
}
-
end
-
end
-
end
-
-
def metadata_for(record)
-
value = record.metadata
-
value.is_a?(Hash) ? value.deep_dup : {}
-
end
-
-
def deep_stringify(value)
-
case value
-
when Hash
-
value.each_with_object({}) do |(key, child), out|
-
out[key.to_s] = deep_stringify(child)
-
end
-
when Array
-
value.map { |child| deep_stringify(child) }
-
else
-
value
-
end
-
end
-
-
def iso_timestamp
-
Time.current.iso8601(3)
-
end
-
end
-
end
-
require "json"
-
-
module Ai
-
class PostAnalyzer
-
DEFAULT_MODEL = "mistral:7b".freeze
-
-
def initialize(client: nil, model: nil)
-
@client = client || Ai::LocalMicroserviceClient.new
-
@model = model.presence || DEFAULT_MODEL
-
end
-
-
def analyze!(post_payload:, image_data_url: nil)
-
system = <<~SYS.strip
-
You analyze an Instagram feed post payload and optionally an image.
-
-
Output MUST be strict JSON. No markdown.
-
-
Constraints:
-
- Do NOT guess sensitive demographics (age, gender, ethnicity, religion, nationality, native place).
-
- If the payload contains explicit self-declared information, you may repeat it as evidence.
-
- Decide whether we should store this post (relevant) or ignore it (irrelevant) based on tags/rules in the payload.
-
- Provide only safe, non-deceptive interaction suggestions.
-
- Style for generated comments: modern Gen Z voice, light slang, playful energy, and occasional emojis.
-
- Keep it socially engaging and authentic without being offensive, sexual, manipulative, or overfamiliar.
-
- First produce a concise, visual image_description; then base comment suggestions on that description.
-
SYS
-
-
user = <<~TXT
-
INPUT_POST_JSON:
-
#{JSON.pretty_generate(post_payload)}
-
-
Produce JSON with keys:
-
- image_description: 1-3 sentence visual description of what is happening in the image
-
- relevant: boolean
-
- author_type: one of ["personal_user","friend","relative","page","unknown"]
-
- topics: array of strings
-
- sentiment: one of ["positive","neutral","negative","mixed","unknown"]
-
- suggested_actions: array of strings from ["ignore","review","like_suggestion","comment_suggestion"]
-
- recommended_next_action: one of ["ignore","review","comment_suggestion","like_suggestion"]
-
- engagement_score: number 0-1
-
- comment_suggestions: array of 5 short comments (friendly/contextual, Gen Z-style voice, based on image_description, may include emojis)
-
- personalization_tokens: array of short contextual tokens we can safely reference
-
- confidence: number 0-1
-
- evidence: short string
-
TXT
-
-
images = []
-
images << image_data_url.to_s if image_data_url.to_s.start_with?("data:image/")
-
-
messages = [
-
{ role: "system", content: [ { type: "text", text: system } ] },
-
{ role: "user", content: build_user_content(text: user, images: images) }
-
]
-
-
resp = @client.chat_completions!(
-
model: @model,
-
messages: messages,
-
temperature: 0.2,
-
usage_category: "report_generation",
-
usage_context: { workflow: "post_analyzer" }
-
)
-
parsed = safe_parse_json(resp[:content])
-
-
{
-
model: @model,
-
prompt: { system: system, user: user, images_count: images.length },
-
response_text: resp[:content],
-
response_raw: resp[:raw],
-
analysis: parsed
-
}
-
end
-
-
private
-
-
def build_user_content(text:, images:)
-
out = [ { type: "text", text: text } ]
-
Array(images).each do |url|
-
out << { type: "image_url", image_url: { url: url } }
-
end
-
out
-
end
-
-
def safe_parse_json(text)
-
JSON.parse(text.to_s)
-
rescue StandardError
-
{ "parse_error" => true, "raw_text" => text.to_s }
-
end
-
end
-
end
-
module Ai
-
class PostCommentGenerationService
-
REQUIRED_SIGNAL_KEYS = %w[history face text_context].freeze
-
MAX_SUGGESTIONS = 8
-
-
def initialize(
-
account:,
-
profile:,
-
post:,
-
preparation_summary: nil,
-
profile_preparation_service: nil,
-
comment_generator: nil,
-
enforce_required_evidence: true
-
)
-
@account = account
-
@profile = profile
-
@post = post
-
@preparation_summary = preparation_summary
-
@profile_preparation_service = profile_preparation_service
-
@comment_generator = comment_generator
-
@enforce_required_evidence = ActiveModel::Type::Boolean.new.cast(enforce_required_evidence)
-
end
-
-
def run!
-
return skipped_result(reason_code: "post_missing") unless post&.persisted?
-
-
analysis = normalized_hash(post.analysis)
-
metadata = normalized_hash(post.metadata)
-
preparation = prepared_history_summary
-
-
face_count = extract_face_count(analysis: analysis, metadata: metadata)
-
ocr_text = extract_ocr_text(analysis: analysis, metadata: metadata)
-
transcript = extract_transcript(analysis: analysis, metadata: metadata)
-
text_context = extract_text_context(analysis: analysis, metadata: metadata)
-
history_ready = ActiveModel::Type::Boolean.new.cast(preparation["ready_for_comment_generation"])
-
-
missing = []
-
missing << "history" unless history_ready
-
missing << "face" unless face_count.positive?
-
missing << "text_context" if text_context.blank?
-
-
if missing.any? && enforce_required_evidence?
-
return persist_blocked!(
-
analysis: analysis,
-
metadata: metadata,
-
preparation: preparation,
-
missing_signals: missing,
-
reason_code: "missing_required_evidence"
-
)
-
end
-
-
topics = merged_topics(analysis: analysis, metadata: metadata)
-
image_description = build_image_description(
-
analysis: analysis,
-
metadata: metadata,
-
topics: topics,
-
transcript: transcript
-
)
-
-
if image_description.blank?
-
return persist_blocked!(
-
analysis: analysis,
-
metadata: metadata,
-
preparation: preparation,
-
missing_signals: [ "visual_context" ],
-
reason_code: "missing_visual_context"
-
)
-
end
-
-
result = comment_generator.generate!(
-
post_payload: post_payload,
-
image_description: image_description,
-
topics: topics,
-
author_type: inferred_author_type,
-
historical_comments: historical_comments,
-
historical_context: historical_context,
-
profile_preparation: preparation,
-
verified_profile_history: verified_profile_history,
-
conversational_voice: conversational_voice,
-
cv_ocr_evidence: build_comment_context_payload(
-
analysis: analysis,
-
metadata: metadata,
-
topics: topics,
-
transcript: transcript,
-
ocr_text: ocr_text
-
)
-
)
-
-
suggestions = normalize_suggestions(result[:comment_suggestions])
-
if suggestions.empty?
-
return persist_blocked!(
-
analysis: analysis,
-
metadata: metadata,
-
preparation: preparation,
-
missing_signals: [ "generation_output" ],
-
reason_code: "comment_generation_empty",
-
error_message: result[:error_message].to_s.presence || "Comment generation produced no valid suggestions."
-
)
-
end
-
-
analysis["comment_suggestions"] = suggestions
-
analysis["comment_generation_status"] = result[:status].to_s.presence || "ok"
-
analysis["comment_generation_source"] = result[:source].to_s.presence || "ollama"
-
analysis["comment_generation_fallback_used"] = ActiveModel::Type::Boolean.new.cast(result[:fallback_used])
-
analysis["comment_generation_error"] = result[:error_message].to_s.presence
-
-
metadata["comment_generation_policy"] = {
-
"status" => missing.any? ? "enabled_with_missing_required_evidence" : "enabled",
-
"required_signals" => REQUIRED_SIGNAL_KEYS,
-
"missing_signals" => missing.any? ? missing : [],
-
"enforce_required_evidence" => enforce_required_evidence?,
-
"history_ready" => history_ready,
-
"history_reason_code" => preparation["reason_code"].to_s.presence,
-
"face_count" => face_count,
-
"text_context_present" => text_context.present?,
-
"ocr_text_present" => ocr_text.present?,
-
"transcript_present" => transcript.present?,
-
"updated_at" => Time.current.iso8601(3)
-
}.compact
-
-
post.update!(analysis: analysis, metadata: metadata)
-
-
{
-
blocked: false,
-
status: analysis["comment_generation_status"],
-
source: analysis["comment_generation_source"],
-
suggestions_count: suggestions.length,
-
reason_code: nil,
-
history_reason_code: preparation["reason_code"].to_s.presence
-
}
-
rescue StandardError => e
-
analysis = normalized_hash(post&.analysis)
-
metadata = normalized_hash(post&.metadata)
-
persist_blocked!(
-
analysis: analysis,
-
metadata: metadata,
-
preparation: prepared_history_summary,
-
missing_signals: [ "generation_error" ],
-
reason_code: "comment_generation_error",
-
error_message: "#{e.class}: #{e.message}"
-
)
-
end
-
-
private
-
-
attr_reader :account, :profile, :post
-
-
def prepared_history_summary
-
return @prepared_history_summary if defined?(@prepared_history_summary)
-
-
@prepared_history_summary =
-
if @preparation_summary.is_a?(Hash)
-
@preparation_summary
-
else
-
service =
-
@profile_preparation_service ||
-
Ai::ProfileCommentPreparationService.new(
-
account: account,
-
profile: profile,
-
analyze_missing_posts: false
-
)
-
service.prepare!(force: false)
-
end
-
rescue StandardError => e
-
{
-
"ready_for_comment_generation" => false,
-
"reason_code" => "profile_preparation_failed",
-
"reason" => e.message.to_s,
-
"error_class" => e.class.name
-
}
-
end
-
-
def comment_generator
-
@comment_generator ||=
-
Ai::LocalEngagementCommentGenerator.new(
-
ollama_client: Ai::OllamaClient.new,
-
model: preferred_model
-
)
-
end
-
-
def preferred_model
-
row = profile&.latest_analysis&.ai_provider_setting
-
row&.config_value("ollama_model").to_s.presence || "mistral:7b"
-
rescue StandardError
-
"mistral:7b"
-
end
-
-
def post_payload
-
builder = Ai::PostAnalysisContextBuilder.new(profile: profile, post: post)
-
payload = builder.payload
-
payload[:rules] = (payload[:rules].is_a?(Hash) ? payload[:rules] : {}).merge(
-
require_history_context: true,
-
require_face_signal: true,
-
require_ocr_signal: true,
-
require_text_context: true
-
)
-
payload
-
rescue StandardError
-
{}
-
end
-
-
def inferred_author_type
-
tags = profile.profile_tags.pluck(:name).map(&:to_s)
-
return "relative" if tags.include?("relative")
-
return "friend" if tags.include?("friend") || tags.include?("female_friend") || tags.include?("male_friend")
-
return "page" if tags.include?("page")
-
return "personal_user" if tags.include?("personal_user")
-
-
"unknown"
-
rescue StandardError
-
"unknown"
-
end
-
-
def historical_comments
-
rows = profile.instagram_profile_events.where(kind: "post_comment_sent").order(detected_at: :desc, id: :desc).limit(20).pluck(:metadata)
-
out = rows.filter_map do |meta|
-
row = meta.is_a?(Hash) ? meta : {}
-
row["comment_text"].to_s.strip.presence
-
end
-
out.uniq.first(12)
-
rescue StandardError
-
[]
-
end
-
-
def historical_context
-
profile.history_narrative_text(max_chunks: 4).to_s
-
rescue StandardError
-
""
-
end
-
-
def verified_profile_history
-
rows = profile.instagram_profile_posts
-
.where(ai_status: "analyzed")
-
.where.not(id: post.id)
-
.includes(:instagram_post_faces)
-
.recent_first
-
.limit(8)
-
-
rows.map do |row|
-
analysis = normalized_hash(row.analysis)
-
{
-
shortcode: row.shortcode.to_s,
-
taken_at: row.taken_at&.iso8601,
-
topics: normalized_topics(analysis["topics"]).first(8),
-
objects: normalized_topics(analysis["objects"]).first(8),
-
hashtags: normalized_topics(analysis["hashtags"]).first(8),
-
mentions: normalized_topics(analysis["mentions"]).first(8),
-
face_count: row.instagram_post_faces.size,
-
image_description: analysis["image_description"].to_s.byteslice(0, 220)
-
}
-
end
-
rescue StandardError
-
[]
-
end
-
-
def conversational_voice
-
summary = profile.instagram_profile_behavior_profile&.behavioral_summary
-
summary = {} unless summary.is_a?(Hash)
-
-
{
-
profile_tags: profile.profile_tags.pluck(:name).map(&:to_s).uniq.first(10),
-
recurring_topics: hash_keys(summary["topic_clusters"]),
-
recurring_hashtags: hash_keys(summary["top_hashtags"]),
-
frequent_people_labels: frequent_people_labels(summary["frequent_secondary_persons"])
-
}
-
rescue StandardError
-
{}
-
end
-
-
def hash_keys(value)
-
return [] unless value.is_a?(Hash)
-
-
value.keys.map(&:to_s).map(&:strip).reject(&:blank?).first(10)
-
end
-
-
def frequent_people_labels(value)
-
Array(value).filter_map do |row|
-
next unless row.is_a?(Hash)
-
-
row["label"].to_s.presence || row[:label].to_s.presence
-
end.map(&:to_s).map(&:strip).reject(&:blank?).uniq.first(8)
-
end
-
-
def normalized_topics(value)
-
Array(value).map(&:to_s).map(&:strip).reject(&:blank?).uniq
-
end
-
-
def merged_topics(analysis:, metadata:)
-
normalized_topics(
-
normalized_topics(analysis["topics"]) +
-
normalized_topics(analysis["video_topics"]) +
-
normalized_topics(analysis["video_objects"]) +
-
normalized_topics(analysis["video_hashtags"]) +
-
normalized_topics(metadata.dig("video_processing", "topics")) +
-
normalized_topics(metadata.dig("video_processing", "objects")) +
-
normalized_topics(metadata.dig("video_processing", "hashtags"))
-
)
-
end
-
-
def normalize_suggestions(value)
-
Array(value).filter_map do |raw|
-
text = raw.to_s.gsub(/\s+/, " ").strip
-
next if text.blank?
-
-
text.byteslice(0, 140)
-
end.uniq.first(MAX_SUGGESTIONS)
-
end
-
-
def extract_face_count(analysis:, metadata:)
-
summary_face_count = analysis.dig("face_summary", "face_count").to_i
-
return summary_face_count if summary_face_count.positive?
-
-
metadata.dig("face_recognition", "face_count").to_i
-
end
-
-
def extract_ocr_text(analysis:, metadata:)
-
analysis["ocr_text"].to_s.strip.presence ||
-
analysis["video_ocr_text"].to_s.strip.presence ||
-
metadata.dig("ocr_analysis", "ocr_text").to_s.strip.presence ||
-
metadata.dig("video_processing", "ocr_text").to_s.strip.presence
-
end
-
-
def extract_transcript(analysis:, metadata:)
-
analysis["transcript"].to_s.strip.presence ||
-
metadata.dig("video_processing", "transcript").to_s.strip.presence
-
end
-
-
def extract_text_context(analysis:, metadata:)
-
[ extract_ocr_text(analysis: analysis, metadata: metadata), extract_transcript(analysis: analysis, metadata: metadata) ]
-
.map(&:to_s)
-
.map(&:strip)
-
.reject(&:blank?)
-
.join("\n")
-
.presence
-
end
-
-
def build_image_description(analysis:, metadata:, topics:, transcript:)
-
description = analysis["image_description"].to_s.strip
-
if description.blank? && topics.any?
-
description = "Detected visual signals: #{topics.first(6).join(', ')}."
-
end
-
-
video_summary = analysis["video_context_summary"].to_s.strip.presence || metadata.dig("video_processing", "context_summary").to_s.strip.presence
-
if description.present? && video_summary.present?
-
description = "#{description} #{video_summary}".strip
-
elsif description.blank? && video_summary.present?
-
description = video_summary
-
end
-
-
if transcript.to_s.present?
-
transcript_excerpt = transcript.to_s.gsub(/\s+/, " ").strip.byteslice(0, 220)
-
snippet = "Audio transcript: #{transcript_excerpt}."
-
description = [ description, snippet ].compact.join(" ").strip
-
end
-
-
description.presence
-
end
-
-
def build_comment_context_payload(analysis:, metadata:, topics:, transcript:, ocr_text:)
-
{
-
source: "post_analysis",
-
media_type: analysis["video_semantic_route"].to_s.presence || metadata.dig("video_processing", "semantic_route").to_s.presence || "image",
-
objects: topics.first(20),
-
hashtags: normalized_topics(analysis["hashtags"]).first(20),
-
mentions: normalized_topics(analysis["mentions"]).first(20),
-
profile_handles: normalized_topics(analysis["video_profile_handles"]).first(20),
-
scenes: Array(analysis["video_scenes"]).select { |row| row.is_a?(Hash) }.first(20),
-
ocr_text: ocr_text.to_s.presence,
-
transcript: transcript.to_s.presence
-
}.compact
-
end
-
-
def persist_blocked!(analysis:, metadata:, preparation:, missing_signals:, reason_code:, error_message: nil)
-
analysis = normalized_hash(analysis)
-
metadata = normalized_hash(metadata)
-
-
missing = Array(missing_signals).map(&:to_s).map(&:strip).reject(&:blank?).uniq
-
reason = blocked_reason(preparation: preparation, missing_signals: missing, fallback_reason_code: reason_code)
-
-
analysis["comment_suggestions"] = []
-
analysis["comment_generation_status"] = "blocked_missing_required_evidence"
-
analysis["comment_generation_source"] = "policy"
-
analysis["comment_generation_fallback_used"] = false
-
analysis["comment_generation_error"] = error_message.to_s.presence || reason
-
-
metadata["comment_generation_policy"] = {
-
"status" => "blocked",
-
"required_signals" => REQUIRED_SIGNAL_KEYS,
-
"missing_signals" => missing,
-
"enforce_required_evidence" => enforce_required_evidence?,
-
"history_ready" => ActiveModel::Type::Boolean.new.cast(preparation["ready_for_comment_generation"]),
-
"history_reason_code" => preparation["reason_code"].to_s.presence,
-
"history_reason" => preparation["reason"].to_s.presence,
-
"blocked_reason_code" => reason_code.to_s.presence || "missing_required_evidence",
-
"blocked_reason" => reason,
-
"updated_at" => Time.current.iso8601(3)
-
}.compact
-
-
post.update!(analysis: analysis, metadata: metadata) if post&.persisted?
-
-
{
-
blocked: true,
-
status: analysis["comment_generation_status"],
-
source: analysis["comment_generation_source"],
-
suggestions_count: 0,
-
reason_code: reason_code.to_s.presence || "missing_required_evidence",
-
history_reason_code: preparation["reason_code"].to_s.presence
-
}
-
end
-
-
def blocked_reason(preparation:, missing_signals:, fallback_reason_code:)
-
parts = []
-
parts << "history_not_ready(#{preparation['reason_code']})" if missing_signals.include?("history")
-
parts << "face_signal_missing" if missing_signals.include?("face")
-
parts << "text_context_missing(ocr_or_transcript)" if missing_signals.include?("text_context")
-
parts << fallback_reason_code.to_s if parts.empty?
-
parts.join(", ")
-
end
-
-
def skipped_result(reason_code:)
-
{
-
blocked: true,
-
status: "skipped",
-
source: "policy",
-
suggestions_count: 0,
-
reason_code: reason_code.to_s,
-
history_reason_code: nil
-
}
-
end
-
-
def normalized_hash(value)
-
value.is_a?(Hash) ? value.deep_dup : {}
-
end
-
-
def enforce_required_evidence?
-
@enforce_required_evidence
-
end
-
end
-
end
-
module Ai
-
class PostOcrService
-
def initialize(client: Ai::LocalMicroserviceClient.new)
-
@client = client
-
end
-
-
def extract_from_image_bytes(image_bytes:, usage_context: {})
-
return skipped_result(reason: "image_bytes_missing") if image_bytes.blank?
-
-
response = @client.analyze_image_bytes!(
-
image_bytes,
-
features: [ { type: "TEXT_DETECTION" } ],
-
usage_category: "ocr",
-
usage_context: usage_context
-
)
-
-
rows = Array(response["textAnnotations"])
-
blocks = rows.map do |row|
-
next unless row.is_a?(Hash)
-
-
text = row["description"].to_s.strip
-
next if text.blank?
-
-
{
-
"text" => text,
-
"confidence" => row["confidence"].to_f,
-
"bbox" => normalize_bbox(row.dig("boundingPoly", "vertices")),
-
"source" => "ocr"
-
}
-
end.compact.first(80)
-
-
{
-
skipped: false,
-
ocr_text: blocks.map { |row| row["text"] }.uniq.join("\n").presence,
-
ocr_blocks: blocks,
-
metadata: {
-
source: "local_microservice_ocr",
-
block_count: blocks.length
-
}
-
}
-
rescue StandardError => e
-
{
-
skipped: true,
-
ocr_text: nil,
-
ocr_blocks: [],
-
metadata: {
-
source: "local_microservice_ocr",
-
reason: "ocr_error",
-
error_class: e.class.name,
-
error_message: e.message.to_s
-
}
-
}
-
end
-
-
private
-
-
def skipped_result(reason:)
-
{
-
skipped: true,
-
ocr_text: nil,
-
ocr_blocks: [],
-
metadata: {
-
source: "local_microservice_ocr",
-
reason: reason
-
}
-
}
-
end
-
-
def normalize_bbox(vertices)
-
points = Array(vertices).map do |row|
-
next unless row.is_a?(Hash)
-
-
x = row["x"]
-
y = row["y"]
-
next if x.nil? || y.nil?
-
-
[ x.to_f, y.to_f ]
-
end.compact
-
return {} if points.empty?
-
-
xs = points.map(&:first)
-
ys = points.map(&:last)
-
{
-
"x1" => xs.min,
-
"y1" => ys.min,
-
"x2" => xs.max,
-
"y2" => ys.max
-
}
-
end
-
end
-
end
-
require "json"
-
-
module Ai
-
class ProfileAnalyzer
-
DEFAULT_MODEL = "mistral:7b".freeze
-
-
def initialize(client: nil, model: nil)
-
@client = client || Ai::LocalMicroserviceClient.new
-
@model = model.presence || DEFAULT_MODEL
-
end
-
-
def analyze!(profile_payload:, images: [])
-
system = <<~SYS.strip
-
You analyze Instagram profile data and produce a compact JSON report that can be used to draft friendly, respectful messages.
-
-
Safety/constraints:
-
- For demographics (age/gender/location), provide cautious estimates only when there is supporting evidence.
-
- Use a modern, socially natural Gen Z-style voice for message/comment suggestions:
-
light slang, playful phrasing, mild humor, and selective emojis.
-
- Keep tone authentic and kind, sexual content, or manipulative language.
-
- Output MUST be strict JSON (no markdown, no commentary).
-
SYS
-
-
user_text = <<~TXT
-
INPUT_PAYLOAD_JSON:
-
#{JSON.pretty_generate(profile_payload)}
-
-
Produce JSON with keys:
-
- summary: short 3-6 sentence summary of interests + tone + interaction style
-
- languages: array of {language, confidence, evidence}
-
- likes: array of strings (topics/content likely liked)
-
- dislikes: array of strings (topics/content likely avoided)
-
- intent_labels: array of strings from ["friendship","networking","business","flirting","unknown"]
-
- conversation_hooks: array of {hook, evidence}
-
- personalization_tokens: array of safe, non-sensitive details we can mention
-
- no_go_zones: array of topics/styles to avoid
-
- writing_style: {tone, formality, emoji_usage, slang_level, evidence}
-
- response_style_prediction: one of ["short","medium","long","unknown"]
-
- engagement_probability: number 0-1
-
- recommended_next_action: one of ["dm","comment","wait","ignore","review"]
-
- demographic_estimates: {age, age_confidence, gender, gender_confidence, location, location_confidence, evidence}
-
- self_declared: {age, gender, location, pronouns, other}
-
- suggested_dm_openers: 5 short openers in friendly Gen Z-style voice (light slang/humor/emojis when natural)
-
- suggested_comment_templates: 5 short comment templates in the same voice
-
- confidence_notes: short string describing what was/wasn't available
-
- why_not_confident: short string listing missing signals that reduced confidence
-
TXT
-
-
messages = [
-
{ role: "system", content: [ { type: "text", text: system } ] },
-
{ role: "user", content: build_user_content(text: user_text, images: images) }
-
]
-
-
resp = @client.chat_completions!(
-
model: @model,
-
messages: messages,
-
temperature: 0.2,
-
usage_category: "report_generation",
-
usage_context: { workflow: "profile_analyzer" }
-
)
-
parsed = safe_parse_json(resp[:content])
-
-
{
-
model: @model,
-
prompt: { system: system, user: user_text, images_count: images.length },
-
response_text: resp[:content],
-
response_raw: resp[:raw],
-
analysis: parsed
-
}
-
end
-
-
private
-
-
def build_user_content(text:, images:)
-
out = [ { type: "text", text: text } ]
-
-
Array(images).each do |img|
-
url = img.to_s.strip
-
next if url.blank?
-
out << { type: "image_url", image_url: { url: url } }
-
end
-
-
out
-
end
-
-
def safe_parse_json(text)
-
JSON.parse(text.to_s)
-
rescue StandardError
-
{ "parse_error" => true, "raw_text" => text.to_s }
-
end
-
end
-
end
-
module Ai
-
class ProfileAutoTagger
-
TAG_KEYS = %w[personal_user friend female_friend male_friend relative page excluded automatic_reply].freeze
-
-
class << self
-
def sync_from_post_analysis!(profile:, analysis:)
-
return unless profile
-
return unless analysis.is_a?(Hash)
-
-
inferred = infer_tags(profile: profile, analysis: analysis)
-
return if inferred.empty?
-
-
existing = profile.profile_tags.pluck(:name)
-
desired = (existing + inferred).uniq
-
tags = desired.filter_map do |name|
-
next unless TAG_KEYS.include?(name.to_s)
-
ProfileTag.find_or_create_by!(name: name.to_s)
-
end
-
profile.profile_tags = tags
-
profile.save!
-
rescue StandardError
-
nil
-
end
-
-
private
-
-
def infer_tags(profile:, analysis:)
-
tags = []
-
author_type = analysis["author_type"].to_s
-
relevant = analysis["relevant"]
-
confidence = analysis["confidence"].to_f
-
-
case author_type
-
when "page"
-
tags << "page"
-
when "relative"
-
tags << "relative"
-
when "friend"
-
tags << "friend"
-
when "personal_user"
-
tags << "personal_user"
-
end
-
-
tags << "excluded" if relevant == false && confidence >= 0.6
-
-
if relevant == true && confidence >= 0.65 && profile.can_message == true
-
tags << "automatic_reply"
-
end
-
-
tags.uniq
-
end
-
end
-
end
-
end
-
module Ai
-
class ProfileCommentPreparationService
-
DEFAULT_POSTS_LIMIT = 10
-
DEFAULT_COMMENTS_LIMIT = 12
-
MAX_POSTS_LIMIT = 20
-
MIN_REQUIRED_ANALYZED_POSTS = 3
-
CACHE_TTL = 30.minutes
-
PREPARATION_VERSION = "profile_comment_preparation_v1".freeze
-
-
def initialize(
-
account:,
-
profile:,
-
posts_limit: DEFAULT_POSTS_LIMIT,
-
comments_limit: DEFAULT_COMMENTS_LIMIT,
-
analyze_missing_posts: true,
-
collector: nil,
-
post_analyzer: nil,
-
user_profile_builder_service: UserProfileBuilderService.new,
-
face_identity_resolution_service: FaceIdentityResolutionService.new
-
)
-
@account = account
-
@profile = profile
-
@posts_limit = posts_limit.to_i.clamp(1, MAX_POSTS_LIMIT)
-
@comments_limit = comments_limit.to_i.clamp(1, 20)
-
@analyze_missing_posts = ActiveModel::Type::Boolean.new.cast(analyze_missing_posts)
-
@collector = collector
-
@post_analyzer = post_analyzer
-
@user_profile_builder_service = user_profile_builder_service
-
@face_identity_resolution_service = face_identity_resolution_service
-
end
-
-
def prepare!(force: false)
-
cached = read_cached_summary
-
if !force && cache_valid?(cached)
-
return cached.merge(
-
"from_cache" => true,
-
"ready_for_comment_generation" => ActiveModel::Type::Boolean.new.cast(cached["ready_for_comment_generation"])
-
)
-
end
-
-
collected_posts = collect_recent_posts
-
recent_posts = load_recent_posts(collected_posts: collected_posts)
-
analysis = analyze_recent_posts!(recent_posts: recent_posts)
-
resolve_identities_for_recent_posts!(recent_posts: recent_posts)
-
@user_profile_builder_service.refresh!(profile: @profile)
-
-
identity_consistency = build_identity_consistency
-
readiness = build_readiness(analysis: analysis, identity_consistency: identity_consistency, recent_posts_count: recent_posts.length)
-
-
summary = {
-
"version" => PREPARATION_VERSION,
-
"prepared_at" => Time.current.iso8601,
-
"profile_id" => @profile.id,
-
"instagram_account_id" => @account.id,
-
"posts_limit" => @posts_limit,
-
"comments_limit" => @comments_limit,
-
"recent_posts_count" => recent_posts.length,
-
"analysis" => analysis,
-
"identity_consistency" => identity_consistency,
-
"ready_for_comment_generation" => readiness[:ready],
-
"reason_code" => readiness[:reason_code],
-
"reason" => readiness[:reason]
-
}
-
-
persist_summary(summary)
-
summary
-
rescue StandardError => e
-
summary = {
-
"version" => PREPARATION_VERSION,
-
"prepared_at" => Time.current.iso8601,
-
"profile_id" => @profile&.id,
-
"instagram_account_id" => @account&.id,
-
"ready_for_comment_generation" => false,
-
"reason_code" => "profile_preparation_failed",
-
"reason" => e.message.to_s,
-
"error_class" => e.class.name
-
}
-
persist_summary(summary)
-
summary
-
end
-
-
private
-
-
def collect_recent_posts
-
collector = @collector || Instagram::ProfileAnalysisCollector.new(account: @account, profile: @profile)
-
result = collector.collect_and_persist!(posts_limit: @posts_limit, comments_limit: @comments_limit)
-
Array(result[:posts]).compact
-
rescue StandardError
-
[]
-
end
-
-
def load_recent_posts(collected_posts:)
-
rows = Array(collected_posts).select(&:persisted?)
-
if rows.empty?
-
rows = @profile.instagram_profile_posts.recent_first.limit(@posts_limit).to_a
-
end
-
rows.sort_by { |post| [ post.taken_at || Time.at(0), post.id.to_i ] }.reverse.first(@posts_limit)
-
end
-
-
def analyze_recent_posts!(recent_posts:)
-
analyzer = @post_analyzer || method(:analyze_post!)
-
analyzed = 0
-
pending = 0
-
failed = []
-
structured_signals = 0
-
-
recent_posts.each do |post|
-
begin
-
if !post_analyzed?(post)
-
if @analyze_missing_posts
-
analyzer.call(post)
-
post.reload
-
else
-
pending += 1
-
next
-
end
-
end
-
-
if post_analyzed?(post)
-
analyzed += 1
-
ensure_post_face_recognition!(post: post)
-
structured_signals += 1 if post_has_structured_signals?(post)
-
else
-
pending += 1
-
end
-
rescue StandardError => e
-
failed << {
-
"post_id" => post.id,
-
"shortcode" => post.shortcode,
-
"error" => e.message.to_s
-
}
-
end
-
end
-
-
{
-
"analyzed_posts_count" => analyzed,
-
"pending_posts_count" => pending,
-
"failed_posts_count" => failed.length,
-
"failed_posts" => failed.first(12),
-
"posts_with_structured_signals_count" => structured_signals,
-
"latest_posts_analyzed" => (pending.zero? && failed.empty?)
-
}
-
end
-
-
def analyze_post!(post)
-
AnalyzeInstagramProfilePostJob.perform_now(
-
instagram_account_id: @account.id,
-
instagram_profile_id: @profile.id,
-
instagram_profile_post_id: post.id,
-
pipeline_mode: "inline",
-
task_flags: {
-
generate_comments: false
-
}
-
)
-
end
-
-
def post_analyzed?(post)
-
post.ai_status.to_s == "analyzed" && post.analyzed_at.present?
-
end
-
-
def ensure_post_face_recognition!(post:)
-
return unless post.media.attached?
-
return unless post.media.blob&.content_type.to_s.start_with?("image/")
-
return if post.instagram_post_faces.exists?
-
-
PostFaceRecognitionService.new.process!(post: post)
-
rescue StandardError
-
nil
-
end
-
-
def post_has_structured_signals?(post)
-
analysis = post.analysis.is_a?(Hash) ? post.analysis : {}
-
image_description = analysis["image_description"].to_s
-
topics = Array(analysis["topics"])
-
suggestions = Array(analysis["comment_suggestions"])
-
entities = analysis["entities"].is_a?(Hash) ? analysis["entities"] : {}
-
-
image_description.present? || topics.any? || suggestions.any? || entities.any?
-
end
-
-
def resolve_identities_for_recent_posts!(recent_posts:)
-
recent_posts.each do |post|
-
next unless post.instagram_post_faces.exists?
-
-
@face_identity_resolution_service.resolve_for_post!(
-
post: post,
-
extracted_usernames: extracted_usernames_for_post(post),
-
content_summary: post.analysis.is_a?(Hash) ? post.analysis : {}
-
)
-
rescue StandardError
-
next
-
end
-
end
-
-
def extracted_usernames_for_post(post)
-
analysis = post.analysis.is_a?(Hash) ? post.analysis : {}
-
rows = []
-
rows.concat(Array(analysis["mentions"]))
-
rows.concat(Array(analysis["profile_handles"]))
-
rows.concat(analysis["ocr_text"].to_s.scan(/@[a-zA-Z0-9._]{2,30}/))
-
rows.concat(post.caption.to_s.scan(/@[a-zA-Z0-9._]{2,30}/))
-
rows.map(&:to_s).map(&:strip).reject(&:blank?).uniq.first(20)
-
end
-
-
def build_identity_consistency
-
counts = InstagramPostFace.joins(:instagram_profile_post)
-
.where(instagram_profile_posts: { instagram_profile_id: @profile.id })
-
.where.not(instagram_story_person_id: nil)
-
.group(:instagram_story_person_id)
-
.count
-
-
total_faces = counts.values.sum.to_i
-
return {
-
"consistent" => false,
-
"reason_code" => "insufficient_face_data",
-
"reason" => "No recognized faces found across analyzed posts.",
-
"total_faces" => total_faces
-
} if total_faces <= 0
-
-
person_id, appearances = counts.max_by { |_id, value| value.to_i }
-
appearances = appearances.to_i
-
dominance_ratio = (appearances.to_f / total_faces.to_f).round(4)
-
min_primary_appearances = FaceIdentityResolutionService::MIN_PRIMARY_APPEARANCES
-
min_primary_ratio = FaceIdentityResolutionService::MIN_PRIMARY_RATIO
-
-
person = @profile.instagram_story_people.find_by(id: person_id)
-
linked_usernames = Array(person&.metadata&.dig("linked_usernames")).map { |value| normalize_username(value) }.reject(&:blank?)
-
profile_username = normalize_username(@profile.username)
-
label_username = normalize_username(person&.label)
-
-
account_owner_match = linked_usernames.include?(profile_username) ||
-
label_username == profile_username ||
-
person&.role.to_s == "primary_user"
-
-
consistent = appearances >= min_primary_appearances &&
-
dominance_ratio >= min_primary_ratio &&
-
account_owner_match
-
-
reason_code =
-
if !account_owner_match
-
"primary_identity_not_linked_to_profile"
-
elsif appearances < min_primary_appearances
-
"insufficient_primary_appearances"
-
elsif dominance_ratio < min_primary_ratio
-
"identity_majority_not_confirmed"
-
else
-
"identity_consistent"
-
end
-
-
reason =
-
if consistent
-
"Primary identity is consistent across recent analyzed posts."
-
else
-
"Primary identity consistency requirements were not met (#{reason_code})."
-
end
-
-
{
-
"consistent" => consistent,
-
"reason_code" => reason_code,
-
"reason" => reason,
-
"primary_person_id" => person_id,
-
"primary_role" => person&.role.to_s.presence,
-
"appearance_count" => appearances,
-
"total_faces" => total_faces,
-
"dominance_ratio" => dominance_ratio,
-
"linked_usernames" => linked_usernames.first(10)
-
}.compact
-
rescue StandardError => e
-
{
-
"consistent" => false,
-
"reason_code" => "identity_consistency_error",
-
"reason" => e.message.to_s,
-
"error_class" => e.class.name
-
}
-
end
-
-
def build_readiness(analysis:, identity_consistency:, recent_posts_count:)
-
analysis_data = analysis.is_a?(Hash) ? analysis : {}
-
identity_data = identity_consistency.is_a?(Hash) ? identity_consistency : {}
-
-
analyzed_posts_count = analysis_data["analyzed_posts_count"].to_i
-
structured_signals_count = analysis_data["posts_with_structured_signals_count"].to_i
-
latest_posts_analyzed = ActiveModel::Type::Boolean.new.cast(analysis_data["latest_posts_analyzed"])
-
identity_consistent = ActiveModel::Type::Boolean.new.cast(identity_data["consistent"])
-
required_analyzed = [ recent_posts_count.to_i, MIN_REQUIRED_ANALYZED_POSTS ].min
-
-
if recent_posts_count.to_i <= 0
-
return {
-
ready: false,
-
reason_code: "no_recent_posts_available",
-
reason: "No recent posts are available to build verified profile context."
-
}
-
end
-
unless latest_posts_analyzed
-
return {
-
ready: false,
-
reason_code: "latest_posts_not_analyzed",
-
reason: "Latest posts have not been fully analyzed yet."
-
}
-
end
-
if analyzed_posts_count < required_analyzed
-
return {
-
ready: false,
-
reason_code: "insufficient_analyzed_posts",
-
reason: "Insufficient analyzed posts for reliable historical context."
-
}
-
end
-
if structured_signals_count <= 0
-
return {
-
ready: false,
-
reason_code: "missing_structured_post_signals",
-
reason: "Recent posts do not contain enough structured metadata for grounded comments."
-
}
-
end
-
unless identity_consistent
-
return {
-
ready: false,
-
reason_code: identity_data["reason_code"].to_s.presence || "identity_consistency_not_confirmed",
-
reason: identity_data["reason"].to_s.presence || "Identity consistency could not be confirmed."
-
}
-
end
-
-
{
-
ready: true,
-
reason_code: "profile_context_ready",
-
reason: "Profile history, latest post analysis, and identity consistency verified."
-
}
-
end
-
-
def read_cached_summary
-
metadata = @profile.instagram_profile_behavior_profile&.metadata
-
return {} unless metadata.is_a?(Hash)
-
-
summary = metadata["comment_generation_preparation"]
-
summary.is_a?(Hash) ? summary : {}
-
end
-
-
def cache_valid?(summary)
-
prepared_at = parse_time(summary["prepared_at"])
-
return false unless prepared_at
-
return false if summary["version"].to_s != PREPARATION_VERSION
-
-
prepared_at >= CACHE_TTL.ago
-
end
-
-
def persist_summary(summary)
-
record = InstagramProfileBehaviorProfile.find_or_initialize_by(instagram_profile: @profile)
-
metadata = record.metadata.is_a?(Hash) ? record.metadata.deep_dup : {}
-
metadata["comment_generation_preparation"] = summary
-
record.metadata = metadata
-
record.activity_score = record.activity_score.to_f
-
record.behavioral_summary = {} unless record.behavioral_summary.is_a?(Hash)
-
record.save!
-
rescue StandardError
-
nil
-
end
-
-
def parse_time(value)
-
return nil if value.to_s.blank?
-
-
Time.zone.parse(value.to_s)
-
rescue StandardError
-
nil
-
end
-
-
def normalize_username(value)
-
text = value.to_s.strip.downcase
-
text = text.delete_prefix("@")
-
text.presence
-
end
-
end
-
end
-
require "json"
-
-
module Ai
-
class ProfileDemographicsAggregator
-
DEFAULT_MODEL = "mistral:7b".freeze
-
-
def initialize(account:, model: nil)
-
@account = account
-
@model = model.to_s.presence || DEFAULT_MODEL
-
end
-
-
def aggregate!(dataset:)
-
response = call_aggregator_llm(dataset: dataset)
-
normalized = normalize_result(response)
-
return normalized if normalized[:ok]
-
-
heuristic_fallback(dataset: dataset, error: normalized[:error])
-
rescue StandardError => e
-
heuristic_fallback(dataset: dataset, error: e.message)
-
end
-
-
private
-
-
def call_aggregator_llm(dataset:)
-
client = local_client
-
return nil unless client
-
-
prompt = build_prompt(dataset: dataset)
-
resp = client.generate_text_json!(
-
model: @model,
-
prompt: prompt,
-
temperature: 0.1,
-
max_output_tokens: 1600,
-
usage_category: "report_generation",
-
usage_context: { workflow: "profile_demographics_aggregator" }
-
)
-
-
resp[:json].is_a?(Hash) ? resp[:json] : nil
-
end
-
-
def local_client
-
Ai::LocalMicroserviceClient.new
-
end
-
-
def build_prompt(dataset:)
-
<<~PROMPT
-
You are an AI aggregation engine that consolidates structured JSON analyses over time.
-
-
Task:
-
- Combine profile-level and post-level analysis JSON.
-
- Infer missing demographics cautiously: age, gender, location.
-
- Prefer explicit self-declared evidence over weak assumptions.
-
- Confidence must be 0.0 to 1.0.
-
- If evidence is weak, return null with low confidence.
-
-
Output STRICT JSON only with this schema:
-
{
-
"profile_inference": {
-
"age": 0,
-
"age_range": "",
-
"age_confidence": 0.0,
-
"gender": "",
-
"gender_indicators": [],
-
"gender_confidence": 0.0,
-
"location": "",
-
"location_signals": [],
-
"location_confidence": 0.0,
-
"evidence": "",
-
"why": ""
-
},
-
"post_inferences": [
-
{
-
"shortcode": "",
-
"source_type": "",
-
"source_ref": "",
-
"age": 0,
-
"gender": "",
-
"location": "",
-
"confidence": 0.0,
-
"evidence": "",
-
"relevant": true
-
}
-
]
-
}
-
-
INPUT_DATASET_JSON:
-
#{JSON.pretty_generate(dataset)}
-
PROMPT
-
end
-
-
def normalize_result(raw)
-
return { ok: false, error: "aggregator_response_blank" } unless raw.is_a?(Hash)
-
-
profile_raw = raw["profile_inference"].is_a?(Hash) ? raw["profile_inference"] : {}
-
post_raw = Array(raw["post_inferences"]).select { |entry| entry.is_a?(Hash) }
-
-
profile_inference = {
-
age: integer_or_nil(profile_raw["age"]),
-
age_range: clean_text(profile_raw["age_range"]),
-
age_confidence: float_or_nil(profile_raw["age_confidence"]),
-
gender: clean_text(profile_raw["gender"]),
-
gender_indicators: Array(profile_raw["gender_indicators"]).map { |v| clean_text(v) }.compact.first(6),
-
gender_confidence: float_or_nil(profile_raw["gender_confidence"]),
-
location: clean_text(profile_raw["location"]),
-
location_signals: Array(profile_raw["location_signals"]).map { |v| clean_text(v) }.compact.first(8),
-
location_confidence: float_or_nil(profile_raw["location_confidence"]),
-
evidence: clean_text(profile_raw["evidence"]),
-
why: clean_text(profile_raw["why"])
-
}
-
-
post_inferences = post_raw.filter_map do |entry|
-
shortcode = clean_text(entry["shortcode"])
-
next if shortcode.blank?
-
-
{
-
shortcode: shortcode,
-
source_type: clean_text(entry["source_type"]),
-
source_ref: clean_text(entry["source_ref"]),
-
age: integer_or_nil(entry["age"]),
-
gender: clean_text(entry["gender"]),
-
location: clean_text(entry["location"]),
-
confidence: float_or_nil(entry["confidence"]),
-
evidence: clean_text(entry["evidence"]),
-
relevant: ActiveModel::Type::Boolean.new.cast(entry["relevant"])
-
}
-
end
-
-
{
-
ok: true,
-
source: "json_aggregator_llm",
-
profile_inference: profile_inference,
-
post_inferences: post_inferences
-
}
-
end
-
-
def heuristic_fallback(dataset:, error: nil)
-
profile_demographics = Array(dataset.dig(:analysis_pool, :profile_demographics))
-
post_demographics = Array(dataset.dig(:analysis_pool, :post_demographics))
-
-
ages = profile_demographics.map { |d| integer_or_nil(d["age"] || d[:age]) }.compact
-
genders = profile_demographics.map { |d| clean_text(d["gender"] || d[:gender]) }.reject(&:blank?)
-
locations = profile_demographics.map { |d| clean_text(d["location"] || d[:location]) }.reject(&:blank?)
-
-
ages.concat(post_demographics.map { |d| integer_or_nil(d["age"] || d[:age]) }.compact)
-
genders.concat(post_demographics.map { |d| clean_text(d["gender"] || d[:gender]) }.reject(&:blank?))
-
locations.concat(post_demographics.map { |d| clean_text(d["location"] || d[:location]) }.reject(&:blank?))
-
-
profile_inference = {
-
age: median(ages),
-
age_range: ages.any? ? "#{ages.min}-#{ages.max}" : nil,
-
age_confidence: confidence_from_count(ages.length),
-
gender: mode(genders),
-
gender_indicators: genders.group_by(&:itself).sort_by { |_value, bucket| -bucket.length }.first(4).map(&:first),
-
gender_confidence: confidence_from_count(genders.length),
-
location: mode(locations),
-
location_signals: locations.group_by(&:itself).sort_by { |_value, bucket| -bucket.length }.first(5).map(&:first),
-
location_confidence: confidence_from_count(locations.length),
-
evidence: "Heuristic consolidation from accumulated analysis JSON.",
-
why: error.to_s.presence
-
}
-
-
{
-
ok: true,
-
source: "heuristic_fallback",
-
profile_inference: profile_inference,
-
post_inferences: [],
-
error: error.to_s.presence
-
}
-
end
-
-
def integer_or_nil(value)
-
return nil if value.blank?
-
Integer(value)
-
rescue StandardError
-
nil
-
end
-
-
def float_or_nil(value)
-
return nil if value.blank?
-
Float(value).clamp(0.0, 1.0)
-
rescue StandardError
-
nil
-
end
-
-
def clean_text(value)
-
text = value.to_s.strip
-
text.presence
-
end
-
-
def mode(values)
-
arr = Array(values).reject(&:blank?)
-
return nil if arr.empty?
-
-
arr.group_by(&:itself).max_by { |_v, bucket| bucket.length }&.first
-
end
-
-
def median(values)
-
arr = Array(values).compact.sort
-
return nil if arr.empty?
-
-
mid = arr.length / 2
-
return arr[mid] if arr.length.odd?
-
-
((arr[mid - 1] + arr[mid]) / 2.0).round
-
end
-
-
def confidence_from_count(count)
-
return nil if count.to_i <= 0
-
-
[0.25 + (count.to_i * 0.1), 0.8].min.round(2)
-
end
-
end
-
end
-
1
module Ai
-
1
class ProfileHistoryBuildService
-
1
TARGET_ANALYZED_POSTS = 20
-
1
TARGET_CAPTURED_POSTS = 50
-
1
COLLECTION_COMMENTS_LIMIT = 20
-
1
FACE_RECENCY_REFRESH_DAYS = 7
-
1
FACE_REFRESH_MAX_ENQUEUE_PER_RUN = ENV.fetch("PROFILE_HISTORY_FACE_REFRESH_MAX_ENQUEUE_PER_RUN", "6").to_i.clamp(1, 20)
-
1
FACE_REFRESH_PENDING_WINDOW_HOURS = ENV.fetch("PROFILE_HISTORY_FACE_REFRESH_PENDING_WINDOW_HOURS", "6").to_i.clamp(1, 24)
-
1
FACE_VERIFICATION_MIN_APPEARANCES = FaceIdentityResolutionService::MIN_PRIMARY_APPEARANCES
-
1
FACE_VERIFICATION_MIN_RATIO = FaceIdentityResolutionService::MIN_PRIMARY_RATIO
-
-
PROFILE_INCOMPLETE_REASON_CODES =
-
1
then: 1
if defined?(ProcessPostMetadataTaggingJob::PROFILE_INCOMPLETE_REASON_CODES)
-
1
ProcessPostMetadataTaggingJob::PROFILE_INCOMPLETE_REASON_CODES
-
else: 0
else
-
%w[
-
latest_posts_not_analyzed
-
insufficient_analyzed_posts
-
no_recent_posts_available
-
missing_structured_post_signals
-
profile_preparation_failed
-
profile_preparation_error
-
].freeze
-
end
-
-
1
def initialize(
-
account:,
-
profile:,
-
collector: nil,
-
face_identity_resolution_service: FaceIdentityResolutionService.new
-
)
-
@account = account
-
@profile = profile
-
@collector = collector
-
@face_identity_resolution_service = face_identity_resolution_service
-
end
-
-
1
def execute!
-
policy_decision = Instagram::ProfileScanPolicy.new(profile: @profile).decision
-
then: 0
else: 0
if ActiveModel::Type::Boolean.new.cast(policy_decision[:skip_post_analysis])
-
return persist_and_result!(
-
status: "blocked",
-
ready: false,
-
reason_code: policy_decision[:reason_code].to_s.presence || "profile_scan_policy_blocked",
-
reason: policy_decision[:reason].to_s.presence || "Profile is blocked by scan policy.",
-
checks: default_checks,
-
queue_state: default_queue_state,
-
preparation: {},
-
face_verification: default_face_verification,
-
conversation: default_conversation_state(ready: false)
-
)
-
end
-
-
collection = collect_posts
-
latest_50_posts = active_posts_scope.recent_first.limit(TARGET_CAPTURED_POSTS).to_a
-
latest_20_posts = active_posts_scope.recent_first.limit(TARGET_ANALYZED_POSTS).to_a
-
-
checks = build_capture_checks(collection: collection, latest_50_posts: latest_50_posts, latest_20_posts: latest_20_posts)
-
download_queue = queue_missing_media_downloads(posts: latest_50_posts)
-
analysis_queue = queue_missing_post_analysis(posts: latest_20_posts)
-
preparation = prepare_history_summary(latest_20_posts: latest_20_posts)
-
face_verification = verify_face_identity(latest_posts: latest_50_posts)
-
queue_state = build_queue_state(
-
download_queue: download_queue,
-
analysis_queue: analysis_queue,
-
face_refresh_queue: face_refresh_queue_state(face_verification: face_verification)
-
)
-
queue_work_pending = queue_state["downloads_queued"].to_i.positive? ||
-
queue_state["downloads_pending"].to_i.positive? ||
-
queue_state["analyses_queued"].to_i.positive? ||
-
queue_state["analyses_pending"].to_i.positive? ||
-
queue_state["face_refresh_queued"].to_i.positive? ||
-
queue_state["face_refresh_pending"].to_i.positive? ||
-
queue_state["face_refresh_deferred"].to_i.positive?
-
preparation_ready = ActiveModel::Type::Boolean.new.cast(preparation["ready_for_comment_generation"])
-
face_ready = ActiveModel::Type::Boolean.new.cast(face_verification["confirmed"])
-
-
history_ready = checks.values.all? { |row| ActiveModel::Type::Boolean.new.cast(row["ready"]) } &&
-
!queue_work_pending &&
-
preparation_ready &&
-
face_ready
-
-
reason_code, reason = resolve_reason(
-
checks: checks,
-
queue_state: queue_state,
-
preparation: preparation,
-
face_verification: face_verification,
-
history_ready: history_ready
-
)
-
conversation = build_conversation_state(ready: history_ready)
-
-
persist_and_result!(
-
then: 0
else: 0
status: history_ready ? "ready" : "pending",
-
ready: history_ready,
-
reason_code: reason_code,
-
reason: reason,
-
checks: checks,
-
queue_state: queue_state,
-
preparation: preparation,
-
face_verification: face_verification,
-
conversation: conversation
-
)
-
rescue StandardError => e
-
persist_and_result!(
-
status: "pending",
-
ready: false,
-
reason_code: "history_build_failed",
-
reason: e.message.to_s,
-
checks: default_checks,
-
queue_state: default_queue_state,
-
preparation: {
-
"ready_for_comment_generation" => false,
-
"reason_code" => "profile_preparation_error",
-
"reason" => e.message.to_s
-
},
-
face_verification: default_face_verification,
-
conversation: default_conversation_state(ready: false)
-
)
-
end
-
-
1
private
-
-
1
def collect_posts
-
collector = @collector || Instagram::ProfileAnalysisCollector.new(account: @account, profile: @profile)
-
collector.collect_and_persist!(
-
posts_limit: nil,
-
comments_limit: COLLECTION_COMMENTS_LIMIT,
-
track_missing_as_deleted: true,
-
sync_source: "profile_history_build",
-
download_media: false
-
)
-
rescue StandardError => e
-
{
-
summary: {
-
feed_fetch: {},
-
collection_error: "#{e.class}: #{e.message}"
-
}
-
}
-
end
-
-
1
def build_capture_checks(collection:, latest_50_posts:, latest_20_posts:)
-
then: 0
else: 0
summary = collection.is_a?(Hash) ? collection[:summary] : {}
-
else: 0
then: 0
summary = {} unless summary.is_a?(Hash)
-
then: 0
else: 0
feed_fetch = summary[:feed_fetch].is_a?(Hash) ? summary[:feed_fetch] : {}
-
then: 0
else: 0
feed_fetch = summary["feed_fetch"] if feed_fetch.blank? && summary["feed_fetch"].is_a?(Hash)
-
feed_fetch ||= {}
-
-
more_available = ActiveModel::Type::Boolean.new.cast(feed_fetch["more_available"] || feed_fetch[:more_available])
-
collection_error = summary[:collection_error].to_s.presence || summary["collection_error"].to_s.presence
-
feed_available = feed_fetch.present?
-
all_posts_captured = feed_available && !more_available && collection_error.blank?
-
-
active_count = active_posts_scope.count
-
expected_50 = [ active_count, TARGET_CAPTURED_POSTS ].min
-
latest_50_ready = expected_50.positive? && latest_50_posts.length >= expected_50
-
latest_50_reason_code =
-
then: 0
if expected_50.zero?
-
else: 0
"no_recent_posts_available"
-
then: 0
elsif latest_50_ready
-
"ok"
-
else: 0
else
-
"latest_50_posts_not_captured"
-
end
-
-
expected_20 = [ active_count, TARGET_ANALYZED_POSTS ].min
-
analyzed_recent_20 = latest_20_posts.count { |post| post_analyzed?(post) }
-
latest_20_ready = expected_20.positive? && analyzed_recent_20 >= expected_20
-
latest_20_reason_code =
-
then: 0
if expected_20.zero?
-
else: 0
"no_recent_posts_available"
-
then: 0
elsif latest_20_ready
-
"ok"
-
else: 0
else
-
"latest_posts_not_analyzed"
-
end
-
-
{
-
"all_posts_captured" => {
-
"ready" => all_posts_captured,
-
then: 0
else: 0
"reason_code" => all_posts_captured ? "ok" : "all_posts_not_yet_captured",
-
"captured_posts_count" => active_count,
-
"more_available" => more_available,
-
"source" => feed_fetch["source"] || feed_fetch[:source],
-
"pages_fetched" => feed_fetch["pages_fetched"] || feed_fetch[:pages_fetched],
-
"feed_available" => feed_available,
-
"collection_error" => collection_error
-
}.compact,
-
"latest_50_captured" => {
-
"ready" => latest_50_ready,
-
"reason_code" => latest_50_reason_code,
-
"expected_count" => expected_50,
-
"captured_count" => latest_50_posts.length
-
},
-
"latest_20_analyzed" => {
-
"ready" => latest_20_ready,
-
"reason_code" => latest_20_reason_code,
-
"expected_count" => expected_20,
-
"analyzed_count" => analyzed_recent_20
-
}
-
}
-
end
-
-
1
def queue_missing_media_downloads(posts:)
-
queued_count = 0
-
pending_count = 0
-
skipped_count = 0
-
failures = []
-
post_ids = []
-
-
Array(posts).each do |post|
-
else: 0
then: 0
next unless post
-
then: 0
else: 0
next if post.media.attached?
-
-
else: 0
then: 0
unless downloadable_post?(post)
-
skipped_count += 1
-
next
-
end
-
-
then: 0
else: 0
if media_download_in_flight?(post)
-
pending_count += 1
-
next
-
end
-
-
job = DownloadInstagramProfilePostMediaJob.perform_later(
-
instagram_account_id: @account.id,
-
instagram_profile_id: @profile.id,
-
instagram_profile_post_id: post.id,
-
trigger_analysis: true
-
)
-
queued_count += 1
-
post_ids << post.id
-
mark_history_build_metadata!(
-
post: post,
-
attributes: {
-
"media_download_job_id" => job.job_id,
-
"media_download_queued_at" => Time.current.iso8601(3)
-
}
-
)
-
rescue StandardError => e
-
failures << {
-
then: 0
else: 0
"instagram_profile_post_id" => post&.id,
-
then: 0
else: 0
"shortcode" => post&.shortcode.to_s.presence,
-
"error_class" => e.class.name,
-
"error_message" => e.message.to_s.byteslice(0, 220)
-
}.compact
-
end
-
-
{
-
queued_count: queued_count,
-
pending_count: pending_count,
-
skipped_count: skipped_count,
-
queued_post_ids: post_ids,
-
failures: failures
-
}
-
end
-
-
1
def queue_missing_post_analysis(posts:)
-
queued_count = 0
-
pending_count = 0
-
skipped_count = 0
-
failures = []
-
post_ids = []
-
-
Array(posts).each do |post|
-
else: 0
then: 0
next unless post
-
-
then: 0
else: 0
if post_analyzed?(post)
-
skipped_count += 1
-
next
-
end
-
else: 0
then: 0
unless post.media.attached?
-
pending_count += 1
-
next
-
end
-
then: 0
else: 0
if post_analysis_in_flight?(post)
-
pending_count += 1
-
next
-
end
-
-
job = AnalyzeInstagramProfilePostJob.perform_later(
-
instagram_account_id: @account.id,
-
instagram_profile_id: @profile.id,
-
instagram_profile_post_id: post.id,
-
task_flags: {
-
generate_comments: false,
-
enforce_comment_evidence_policy: false,
-
retry_on_incomplete_profile: false
-
}
-
)
-
queued_count += 1
-
post_ids << post.id
-
mark_history_build_metadata!(
-
post: post,
-
attributes: {
-
"post_analysis_job_id" => job.job_id,
-
"post_analysis_queued_at" => Time.current.iso8601(3)
-
}
-
)
-
rescue StandardError => e
-
failures << {
-
then: 0
else: 0
"instagram_profile_post_id" => post&.id,
-
then: 0
else: 0
"shortcode" => post&.shortcode.to_s.presence,
-
"error_class" => e.class.name,
-
"error_message" => e.message.to_s.byteslice(0, 220)
-
}.compact
-
end
-
-
{
-
queued_count: queued_count,
-
pending_count: pending_count,
-
skipped_count: skipped_count,
-
queued_post_ids: post_ids,
-
failures: failures
-
}
-
end
-
-
1
def build_queue_state(download_queue:, analysis_queue:, face_refresh_queue: {})
-
{
-
"downloads_queued" => download_queue[:queued_count].to_i,
-
"downloads_pending" => download_queue[:pending_count].to_i,
-
"downloads_skipped" => download_queue[:skipped_count].to_i,
-
"analysis_failures" => Array(download_queue[:failures]).first(20),
-
"analyses_queued" => analysis_queue[:queued_count].to_i,
-
"analyses_pending" => analysis_queue[:pending_count].to_i,
-
"analyses_skipped" => analysis_queue[:skipped_count].to_i,
-
"analysis_queue_failures" => Array(analysis_queue[:failures]).first(20),
-
"face_refresh_queued" => face_refresh_queue[:queued_count].to_i,
-
"face_refresh_pending" => face_refresh_queue[:pending_count].to_i,
-
"face_refresh_deferred" => face_refresh_queue[:deferred_count].to_i,
-
"face_refresh_failures" => Array(face_refresh_queue[:failures]).first(20)
-
}
-
end
-
-
1
def prepare_history_summary(latest_20_posts:)
-
collector = ExistingPostsCollector.new(posts: latest_20_posts)
-
Ai::ProfileCommentPreparationService.new(
-
account: @account,
-
profile: @profile,
-
posts_limit: TARGET_ANALYZED_POSTS,
-
comments_limit: COLLECTION_COMMENTS_LIMIT,
-
analyze_missing_posts: false,
-
collector: collector
-
).prepare!(force: true)
-
rescue StandardError => e
-
{
-
"ready_for_comment_generation" => false,
-
"reason_code" => "profile_preparation_error",
-
"reason" => e.message.to_s,
-
"error_class" => e.class.name
-
}
-
end
-
-
1
def verify_face_identity(latest_posts:)
-
refresh_queue = {
-
"queued_count" => 0,
-
"pending_count" => 0,
-
"deferred_count" => 0,
-
"failures" => []
-
}
-
eligible_posts = Array(latest_posts).select { |post| post_analyzed?(post) && post.media.attached? }
-
-
eligible_posts.each do |post|
-
then: 0
else: 0
if face_refresh_required?(post: post)
-
then: 0
else: 0
if face_refresh_in_flight?(post: post)
-
refresh_queue["pending_count"] = refresh_queue["pending_count"].to_i + 1
-
next
-
end
-
-
then: 0
else: 0
if refresh_queue["queued_count"].to_i >= FACE_REFRESH_MAX_ENQUEUE_PER_RUN
-
refresh_queue["deferred_count"] = refresh_queue["deferred_count"].to_i + 1
-
next
-
end
-
-
enqueue_state = enqueue_face_refresh_for_post(post: post)
-
then: 0
if enqueue_state[:queued]
-
refresh_queue["queued_count"] = refresh_queue["queued_count"].to_i + 1
-
else: 0
else
-
refresh_queue["failures"] << {
-
"instagram_profile_post_id" => post.id,
-
"shortcode" => post.shortcode.to_s.presence,
-
"error_class" => enqueue_state[:error_class].to_s.presence || "enqueue_failed",
-
"error_message" => enqueue_state[:error_message].to_s.byteslice(0, 220)
-
}.compact
-
end
-
next
-
end
-
-
then: 0
else: 0
resolve_identity_for_post!(post: post) if post.instagram_post_faces.exists?
-
end
-
-
refresh_queue["failures"] = Array(refresh_queue["failures"]).first(20)
-
-
counts = InstagramPostFace.joins(:instagram_profile_post)
-
.where(instagram_profile_posts: { instagram_profile_id: @profile.id })
-
.where.not(instagram_story_person_id: nil)
-
.group(:instagram_story_person_id)
-
.count
-
total_faces = counts.values.sum.to_i
-
-
then: 0
else: 0
if total_faces <= 0
-
return {
-
"confirmed" => false,
-
"reason_code" => "insufficient_face_data",
-
"reason" => "No detected faces were available for identity verification.",
-
"total_faces" => 0,
-
"reference_face_count" => 0,
-
"dominance_ratio" => 0.0,
-
"combined_faces" => [],
-
"refresh_queue" => refresh_queue
-
}
-
end
-
-
profile_username = normalize_username(@profile.username)
-
people = @profile.instagram_story_people.where(id: counts.keys).index_by(&:id)
-
combined = counts.sort_by { |_id, count| -count.to_i }.map do |person_id, appearances|
-
person = people[person_id]
-
linked = linked_usernames_for(person)
-
then: 0
else: 0
label_username = normalize_username(person&.label)
-
then: 0
else: 0
owner_match = linked.include?(profile_username) || label_username == profile_username || person&.role.to_s == "primary_user"
-
-
{
-
"person_id" => person_id,
-
then: 0
else: 0
"label" => person&.display_label.to_s.presence || "person_#{person_id}",
-
then: 0
else: 0
"role" => person&.role.to_s.presence || "unknown",
-
"appearances" => appearances.to_i,
-
"linked_usernames" => linked,
-
"owner_match" => owner_match
-
}
-
end
-
-
then: 0
else: 0
reference_face_count = combined.sum { |row| row["owner_match"] ? row["appearances"].to_i : 0 }
-
then: 0
else: 0
dominance_ratio = total_faces.positive? ? (reference_face_count.to_f / total_faces.to_f).round(4) : 0.0
-
confirmed = reference_face_count >= FACE_VERIFICATION_MIN_APPEARANCES && dominance_ratio >= FACE_VERIFICATION_MIN_RATIO
-
-
reason_code =
-
then: 0
if confirmed
-
else: 0
"identity_confirmed"
-
then: 0
elsif reference_face_count < FACE_VERIFICATION_MIN_APPEARANCES
-
"insufficient_reference_face_appearances"
-
else: 0
else
-
"identity_match_ratio_too_low"
-
end
-
-
reason =
-
then: 0
if confirmed
-
"Reference face verification confirms this face belongs to @#{@profile.username}."
-
else: 0
else
-
"Reference face verification did not reach the required confidence threshold."
-
end
-
-
{
-
"confirmed" => confirmed,
-
"reason_code" => reason_code,
-
"reason" => reason,
-
"total_faces" => total_faces,
-
"reference_face_count" => reference_face_count,
-
"dominance_ratio" => dominance_ratio,
-
"combined_faces" => combined.first(12),
-
"refresh_queue" => refresh_queue
-
}
-
rescue StandardError => e
-
{
-
"confirmed" => false,
-
"reason_code" => "face_verification_error",
-
"reason" => e.message.to_s,
-
"error_class" => e.class.name,
-
"total_faces" => 0,
-
"reference_face_count" => 0,
-
"dominance_ratio" => 0.0,
-
"combined_faces" => [],
-
"refresh_queue" => {
-
"queued_count" => 0,
-
"pending_count" => 0,
-
"deferred_count" => 0,
-
"failures" => []
-
}
-
}
-
end
-
-
1
def resolve_reason(checks:, queue_state:, preparation:, face_verification:, history_ready:)
-
then: 0
else: 0
return [ "history_ready", "History build completed and identity verified." ] if history_ready
-
-
else: 0
then: 0
unless ActiveModel::Type::Boolean.new.cast(checks.dig("all_posts_captured", "ready"))
-
return [ "all_posts_not_yet_captured", "All posts have not been captured yet." ]
-
end
-
-
else: 0
then: 0
unless ActiveModel::Type::Boolean.new.cast(checks.dig("latest_50_captured", "ready"))
-
code = checks.dig("latest_50_captured", "reason_code").to_s.presence || "latest_50_posts_not_captured"
-
then: 0
else: 0
if code == "no_recent_posts_available"
-
return [ "no_recent_posts_available", "No recent posts are available for history verification." ]
-
end
-
return [ "latest_50_posts_not_captured", "Latest 50 posts have not been fully captured yet." ]
-
end
-
-
then: 0
else: 0
if queue_state["downloads_queued"].to_i.positive? || queue_state["downloads_pending"].to_i.positive?
-
return [ "waiting_for_media_downloads", "Waiting for media downloads to complete before verification." ]
-
end
-
-
then: 0
else: 0
if queue_state["analyses_queued"].to_i.positive? || queue_state["analyses_pending"].to_i.positive?
-
return [ "latest_posts_not_analyzed", "Waiting for latest posts to finish analysis." ]
-
end
-
-
else: 0
if queue_state["face_refresh_queued"].to_i.positive? ||
-
queue_state["face_refresh_pending"].to_i.positive? ||
-
then: 0
queue_state["face_refresh_deferred"].to_i.positive?
-
return [ "waiting_for_face_refresh", "Waiting for face refresh tasks to complete before verification." ]
-
end
-
-
else: 0
then: 0
unless ActiveModel::Type::Boolean.new.cast(checks.dig("latest_20_analyzed", "ready"))
-
code = checks.dig("latest_20_analyzed", "reason_code").to_s.presence
-
then: 0
else: 0
if code == "no_recent_posts_available"
-
return [ "no_recent_posts_available", "No recent posts are available for history verification." ]
-
end
-
return [ "latest_posts_not_analyzed", "Most recent 20 posts are not fully analyzed yet." ]
-
end
-
-
else: 0
then: 0
unless ActiveModel::Type::Boolean.new.cast(preparation["ready_for_comment_generation"])
-
code = preparation["reason_code"].to_s.presence || "profile_preparation_incomplete"
-
reason = preparation["reason"].to_s.presence || "Profile preparation is incomplete."
-
return [ code, reason ]
-
end
-
-
else: 0
then: 0
unless ActiveModel::Type::Boolean.new.cast(face_verification["confirmed"])
-
code = face_verification["reason_code"].to_s.presence || "face_verification_incomplete"
-
reason = face_verification["reason"].to_s.presence || "Face verification is incomplete."
-
return [ code, reason ]
-
end
-
-
[ "history_build_in_progress", "History build is still in progress." ]
-
end
-
-
1
def build_conversation_state(ready:)
-
strategy = @profile.instagram_profile_message_strategies.recent_first.first
-
then: 0
else: 0
openers = normalize_strings(strategy&.opener_templates).first(8)
-
-
incoming_rows = @profile.instagram_messages
-
.where(direction: "incoming")
-
.recent_first
-
.limit(4)
-
.pluck(:body, :created_at)
-
.map do |body, created_at|
-
{
-
"body" => body.to_s.byteslice(0, 220),
-
then: 0
else: 0
"created_at" => created_at&.iso8601
-
}
-
end
-
-
has_incoming = incoming_rows.any?
-
outgoing_count = @profile.instagram_messages.where(direction: "outgoing").count
-
dm_allowed = @profile.dm_allowed?
-
ready_bool = ActiveModel::Type::Boolean.new.cast(ready)
-
-
{
-
"can_generate_initial_message" => ready_bool && dm_allowed && !has_incoming && outgoing_count.zero?,
-
"can_respond_to_existing_messages" => ready_bool && dm_allowed && has_incoming,
-
"continue_natural_interaction" => ready_bool && dm_allowed,
-
"dm_allowed" => dm_allowed,
-
"has_incoming_messages" => has_incoming,
-
"outgoing_message_count" => outgoing_count,
-
"suggested_openers" => openers,
-
"recent_incoming_messages" => incoming_rows
-
}
-
rescue StandardError
-
default_conversation_state(ready: false)
-
end
-
-
1
def persist_and_result!(status:, ready:, reason_code:, reason:, checks:, queue_state:, preparation:, face_verification:, conversation:)
-
ready_bool = ActiveModel::Type::Boolean.new.cast(ready)
-
state = {
-
"status" => status.to_s,
-
"ready" => ready_bool,
-
then: 0
else: 0
"reason_code" => reason_code.to_s.presence || (ready_bool ? "history_ready" : "history_build_in_progress"),
-
then: 0
else: 0
"reason" => reason.to_s.presence || (ready_bool ? "History Ready" : "History build in progress."),
-
"updated_at" => Time.current.iso8601(3),
-
"checks" => checks,
-
"queue" => queue_state,
-
"history_analysis" => {
-
"ready_for_comment_generation" => ActiveModel::Type::Boolean.new.cast(preparation["ready_for_comment_generation"]),
-
"reason_code" => preparation["reason_code"].to_s.presence,
-
"reason" => preparation["reason"].to_s.presence
-
}.compact,
-
"face_verification" => face_verification,
-
"conversation" => conversation
-
}
-
-
behavior = InstagramProfileBehaviorProfile.find_or_initialize_by(instagram_profile: @profile)
-
then: 0
else: 0
metadata = behavior.metadata.is_a?(Hash) ? behavior.metadata.deep_dup : {}
-
metadata["history_build"] = state
-
metadata["history_ready"] = ready_bool
-
then: 0
else: 0
metadata["history_ready_at"] = Time.current.iso8601(3) if ready_bool
-
behavior.metadata = metadata
-
behavior.activity_score = behavior.activity_score.to_f
-
else: 0
then: 0
behavior.behavioral_summary = {} unless behavior.behavioral_summary.is_a?(Hash)
-
behavior.save!
-
-
{
-
status: status.to_s,
-
ready: ready_bool,
-
reason_code: state["reason_code"],
-
reason: state["reason"],
-
retryable_profile_incomplete: PROFILE_INCOMPLETE_REASON_CODES.include?(state["reason_code"].to_s),
-
history_state: state
-
}
-
rescue StandardError
-
{
-
status: status.to_s,
-
ready: ready_bool,
-
reason_code: reason_code.to_s.presence || "history_state_persist_failed",
-
reason: reason.to_s.presence || "Unable to persist history build state.",
-
retryable_profile_incomplete: PROFILE_INCOMPLETE_REASON_CODES.include?(reason_code.to_s),
-
history_state: {
-
"status" => status.to_s,
-
"ready" => ready_bool,
-
"reason_code" => reason_code.to_s,
-
"reason" => reason.to_s
-
}
-
}
-
end
-
-
1
def face_refresh_queue_state(face_verification:)
-
then: 0
else: 0
raw = face_verification.is_a?(Hash) ? face_verification["refresh_queue"] : nil
-
then: 0
else: 0
queue = raw.is_a?(Hash) ? raw : {}
-
{
-
queued_count: queue["queued_count"].to_i,
-
pending_count: queue["pending_count"].to_i,
-
deferred_count: queue["deferred_count"].to_i,
-
failures: Array(queue["failures"]).first(20)
-
}
-
rescue StandardError
-
{
-
queued_count: 0,
-
pending_count: 0,
-
deferred_count: 0,
-
failures: []
-
}
-
end
-
-
1
def face_refresh_required?(post:)
-
then: 0
else: 0
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
then: 0
else: 0
face_recognition = metadata["face_recognition"].is_a?(Hash) ? metadata["face_recognition"] : {}
-
updated_at = parse_time(face_recognition["updated_at"])
-
stale = updated_at.nil? || updated_at < FACE_RECENCY_REFRESH_DAYS.days.ago
-
stale || post.instagram_post_faces.none?
-
rescue StandardError
-
true
-
end
-
-
1
def face_refresh_in_flight?(post:)
-
state = history_build_face_refresh_state(post: post)
-
status = state["status"].to_s
-
else: 0
then: 0
return false unless status.in?(%w[queued running])
-
-
reference_time = parse_time(state["started_at"]) || parse_time(state["queued_at"])
-
reference_time.present? && reference_time >= FACE_REFRESH_PENDING_WINDOW_HOURS.hours.ago
-
rescue StandardError
-
false
-
end
-
-
1
def enqueue_face_refresh_for_post(post:)
-
then: 0
else: 0
return { queued: false, error_class: "AlreadyQueued", error_message: "Face refresh already in flight." } if face_refresh_in_flight?(post: post)
-
-
job = RefreshProfilePostFaceIdentityJob.perform_later(
-
instagram_account_id: @account.id,
-
instagram_profile_id: @profile.id,
-
instagram_profile_post_id: post.id,
-
trigger_source: "profile_history_build"
-
)
-
mark_history_build_metadata!(
-
post: post,
-
attributes: {
-
"face_refresh" => {
-
"status" => "queued",
-
"job_id" => job.job_id,
-
"queue_name" => job.queue_name,
-
"queued_at" => Time.current.iso8601(3),
-
"requested_by" => self.class.name
-
}
-
}
-
)
-
-
{ queued: true, job_id: job.job_id, queue_name: job.queue_name }
-
rescue StandardError => e
-
{
-
queued: false,
-
error_class: e.class.name,
-
error_message: e.message.to_s
-
}
-
end
-
-
1
def history_build_face_refresh_state(post:)
-
then: 0
else: 0
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
then: 0
else: 0
history = metadata["history_build"].is_a?(Hash) ? metadata["history_build"] : {}
-
then: 0
else: 0
refresh = history["face_refresh"].is_a?(Hash) ? history["face_refresh"] : {}
-
refresh
-
rescue StandardError
-
{}
-
end
-
-
1
def resolve_identity_for_post!(post:)
-
@face_identity_resolution_service.resolve_for_post!(
-
post: post,
-
extracted_usernames: extracted_usernames_for_post(post),
-
then: 0
else: 0
content_summary: post.analysis.is_a?(Hash) ? post.analysis : {}
-
)
-
rescue StandardError
-
nil
-
end
-
-
1
def extracted_usernames_for_post(post)
-
then: 0
else: 0
analysis = post.analysis.is_a?(Hash) ? post.analysis : {}
-
rows = []
-
rows.concat(Array(analysis["mentions"]))
-
rows.concat(Array(analysis["profile_handles"]))
-
rows.concat(post.caption.to_s.scan(/@[a-zA-Z0-9._]{2,30}/))
-
rows.concat(analysis["ocr_text"].to_s.scan(/@[a-zA-Z0-9._]{2,30}/))
-
rows.map { |value| normalize_username(value) }.reject(&:blank?).uniq.first(24)
-
end
-
-
1
def linked_usernames_for(person)
-
then: 0
else: 0
meta = person&.metadata
-
then: 0
else: 0
linked = meta.is_a?(Hash) ? meta["linked_usernames"] : nil
-
normalize_strings(linked).map { |value| normalize_username(value) }.reject(&:blank?).uniq
-
end
-
-
1
def normalize_strings(value)
-
Array(value).map { |row| row.to_s.strip }.reject(&:blank?)
-
end
-
-
1
def mark_history_build_metadata!(post:, attributes:)
-
post.with_lock do
-
then: 0
else: 0
metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
then: 0
else: 0
state = metadata["history_build"].is_a?(Hash) ? metadata["history_build"].deep_dup : {}
-
state.merge!(attributes.to_h)
-
state["updated_at"] = Time.current.iso8601(3)
-
metadata["history_build"] = state
-
post.update!(metadata: metadata)
-
end
-
rescue StandardError
-
nil
-
end
-
-
1
def post_analyzed?(post)
-
post.ai_status.to_s == "analyzed" && post.analyzed_at.present?
-
end
-
-
1
def post_analysis_in_flight?(post)
-
then: 0
else: 0
return true if post.ai_status.to_s.in?(%w[pending running])
-
-
then: 0
else: 0
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
then: 0
else: 0
pipeline = metadata["ai_pipeline"].is_a?(Hash) ? metadata["ai_pipeline"] : {}
-
pipeline["status"].to_s == "running"
-
rescue StandardError
-
false
-
end
-
-
1
def media_download_in_flight?(post)
-
then: 0
else: 0
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
status = metadata["download_status"].to_s
-
queued_at = parse_time(metadata["download_queued_at"])
-
status == "queued" && queued_at.present? && queued_at > 8.hours.ago
-
rescue StandardError
-
false
-
end
-
-
1
def downloadable_post?(post)
-
then: 0
else: 0
return false if deleted_post?(post)
-
then: 0
else: 0
return true if post.source_media_url.to_s.strip.present?
-
-
then: 0
else: 0
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
metadata["media_url_video"].to_s.strip.present? || metadata["media_url_image"].to_s.strip.present?
-
end
-
-
1
def deleted_post?(post)
-
then: 0
else: 0
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
ActiveModel::Type::Boolean.new.cast(metadata["deleted_from_source"])
-
end
-
-
1
def active_posts_scope
-
@profile.instagram_profile_posts.where("COALESCE(metadata ->> 'deleted_from_source', 'false') <> 'true'")
-
end
-
-
1
def normalize_username(value)
-
text = value.to_s.strip.downcase
-
text = text.delete_prefix("@")
-
text.presence
-
end
-
-
1
def parse_time(value)
-
then: 0
else: 0
return nil if value.to_s.blank?
-
-
Time.zone.parse(value.to_s)
-
rescue StandardError
-
nil
-
end
-
-
1
def default_checks
-
{
-
"all_posts_captured" => {
-
"ready" => false,
-
"reason_code" => "not_started"
-
},
-
"latest_50_captured" => {
-
"ready" => false,
-
"reason_code" => "not_started"
-
},
-
"latest_20_analyzed" => {
-
"ready" => false,
-
"reason_code" => "not_started"
-
}
-
}
-
end
-
-
1
def default_queue_state
-
{
-
"downloads_queued" => 0,
-
"downloads_pending" => 0,
-
"downloads_skipped" => 0,
-
"analysis_failures" => [],
-
"analyses_queued" => 0,
-
"analyses_pending" => 0,
-
"analyses_skipped" => 0,
-
"analysis_queue_failures" => [],
-
"face_refresh_queued" => 0,
-
"face_refresh_pending" => 0,
-
"face_refresh_deferred" => 0,
-
"face_refresh_failures" => []
-
}
-
end
-
-
1
def default_face_verification
-
{
-
"confirmed" => false,
-
"reason_code" => "not_started",
-
"reason" => "Face verification has not started.",
-
"total_faces" => 0,
-
"reference_face_count" => 0,
-
"dominance_ratio" => 0.0,
-
"combined_faces" => [],
-
"refresh_queue" => {
-
"queued_count" => 0,
-
"pending_count" => 0,
-
"deferred_count" => 0,
-
"failures" => []
-
}
-
}
-
end
-
-
1
def default_conversation_state(ready:)
-
{
-
"can_generate_initial_message" => false,
-
"can_respond_to_existing_messages" => false,
-
"continue_natural_interaction" => ActiveModel::Type::Boolean.new.cast(ready),
-
"dm_allowed" => false,
-
"has_incoming_messages" => false,
-
"outgoing_message_count" => 0,
-
"suggested_openers" => [],
-
"recent_incoming_messages" => []
-
}
-
end
-
-
1
class ExistingPostsCollector
-
1
def initialize(posts:)
-
@posts = posts
-
end
-
-
1
def collect_and_persist!(**_kwargs)
-
{ posts: Array(@posts) }
-
end
-
end
-
end
-
end
-
module Ai
-
class ProfileHistoryNarrativeBuilder
-
CHUNK_WORD_LIMIT = 500
-
-
INTERESTING_EVENT_KINDS = %w[
-
story_uploaded
-
story_viewed
-
story_downloaded
-
story_analyzed
-
story_reply_sent
-
story_reply_skipped
-
story_ad_skipped
-
story_video_skipped
-
story_sync_failed
-
feed_post_image_downloaded
-
feed_post_comment_posted
-
post_comment_sent
-
profile_details_refreshed
-
avatar_downloaded
-
].freeze
-
-
def self.append_event!(event)
-
new(event: event).append_event!
-
end
-
-
def self.append_story_intelligence!(event, intelligence:)
-
new(event: event).append_story_intelligence!(intelligence: intelligence)
-
end
-
-
def initialize(event:)
-
@event = event
-
@profile = event.instagram_profile
-
@account = @profile&.instagram_account
-
end
-
-
def append_event!
-
return unless @profile && @account
-
return unless INTERESTING_EVENT_KINDS.include?(@event.kind.to_s)
-
-
entry = summarize_event(@event)
-
return if entry.blank?
-
-
ts = @event.occurred_at || @event.detected_at || Time.current
-
with_profile_lock do
-
chunk = current_or_new_chunk!(entry: entry, timestamp: ts)
-
content = chunk.content.to_s
-
content = [content, entry].reject(&:blank?).join("\n")
-
chunk.update!(
-
content: content,
-
word_count: words_in(content),
-
entry_count: chunk.entry_count.to_i + 1,
-
starts_at: chunk.starts_at || ts,
-
ends_at: ts
-
)
-
end
-
rescue StandardError => e
-
Rails.logger.warn("[Ai::ProfileHistoryNarrativeBuilder] failed for profile_id=#{@profile&.id}: #{e.class}: #{e.message}")
-
nil
-
end
-
-
def append_story_intelligence!(intelligence:)
-
return unless @profile && @account
-
-
entry = summarize_story_intelligence(intelligence)
-
return if entry.blank?
-
-
ts = @event.occurred_at || @event.detected_at || Time.current
-
with_profile_lock do
-
chunk = current_or_new_chunk!(entry: entry, timestamp: ts)
-
content = chunk.content.to_s
-
content = [content, entry].reject(&:blank?).join("\n")
-
chunk.update!(
-
content: content,
-
word_count: words_in(content),
-
entry_count: chunk.entry_count.to_i + 1,
-
starts_at: chunk.starts_at || ts,
-
ends_at: ts
-
)
-
end
-
rescue StandardError => e
-
Rails.logger.warn("[Ai::ProfileHistoryNarrativeBuilder] intelligence append failed for profile_id=#{@profile&.id}: #{e.class}: #{e.message}")
-
nil
-
end
-
-
private
-
-
def summarize_event(event)
-
metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
-
timestamp = (event.occurred_at || event.detected_at || Time.current).in_time_zone.strftime("%Y-%m-%d %H:%M")
-
base = "[#{timestamp}] #{human_event_title(event.kind)}"
-
-
details = []
-
details << "story_id=#{metadata['story_id']}" if metadata['story_id'].to_s.present?
-
details << "media=#{metadata['media_type']}" if metadata['media_type'].to_s.present?
-
details << "location=#{metadata['location']}" if metadata['location'].to_s.present?
-
details << "event=#{metadata['event']}" if metadata['event'].to_s.present?
-
details << "description=#{normalize_text(metadata['ai_image_description'])}" if metadata['ai_image_description'].to_s.present?
-
details << "caption=#{normalize_text(metadata['caption'])}" if metadata['caption'].to_s.present?
-
details << "comment=#{normalize_text(metadata['ai_reply_text'] || metadata['comment_text'])}" if (metadata['ai_reply_text'].to_s.present? || metadata['comment_text'].to_s.present?)
-
details << "reason=#{normalize_text(metadata['reason'] || metadata['skip_reason'])}" if (metadata['reason'].to_s.present? || metadata['skip_reason'].to_s.present?)
-
details << "url=#{metadata['story_url']}" if metadata['story_url'].to_s.present?
-
details << "permalink=#{metadata['permalink']}" if metadata['permalink'].to_s.present?
-
details << "topics=#{Array(metadata['topics']).first(8).join(',')}" if Array(metadata['topics']).any?
-
details << "objects=#{Array(metadata['content_signals']).first(8).join(',')}" if Array(metadata['content_signals']).any?
-
details << "hashtags=#{Array(metadata['hashtags']).first(8).join(',')}" if Array(metadata['hashtags']).any?
-
details << "mentions=#{Array(metadata['mentions']).first(6).join(',')}" if Array(metadata['mentions']).any?
-
details << "ocr=#{normalize_text(metadata['ocr_text'])}" if metadata['ocr_text'].to_s.present?
-
details << "transcript=#{normalize_text(metadata['transcript'])}" if metadata['transcript'].to_s.present?
-
-
line = [base, details.join(" | ")].reject(&:blank?).join(" - ")
-
line.byteslice(0, 900)
-
end
-
-
def summarize_story_intelligence(intelligence)
-
data = intelligence.is_a?(Hash) ? intelligence : {}
-
ts = (@event.occurred_at || @event.detected_at || Time.current).in_time_zone.strftime("%Y-%m-%d %H:%M")
-
generation_policy = data[:generation_policy].is_a?(Hash) ? data[:generation_policy] : (data["generation_policy"].is_a?(Hash) ? data["generation_policy"] : {})
-
-
details = []
-
details << "topic=#{Array(data[:topics]).first(8).join(',')}" if Array(data[:topics]).any?
-
details << "objects=#{Array(data[:objects]).first(8).join(',')}" if Array(data[:objects]).any?
-
details << "hashtags=#{Array(data[:hashtags]).first(8).join(',')}" if Array(data[:hashtags]).any?
-
details << "mentions=#{Array(data[:mentions]).first(6).join(',')}" if Array(data[:mentions]).any?
-
details << "handles=#{Array(data[:profile_handles] || data['profile_handles']).first(6).join(',')}" if Array(data[:profile_handles] || data['profile_handles']).any?
-
details << "detected_users=#{Array(data[:detected_usernames] || data['detected_usernames']).first(6).join(',')}" if Array(data[:detected_usernames] || data['detected_usernames']).any?
-
details << "source_refs=#{Array(data[:source_profile_references] || data['source_profile_references']).first(4).join(',')}" if Array(data[:source_profile_references] || data['source_profile_references']).any?
-
details << "share=#{data[:share_status] || data['share_status']}" if (data[:share_status] || data['share_status']).to_s.present?
-
details << "scenes=#{Array(data[:scenes]).first(6).map { |row| row.is_a?(Hash) ? row[:type] || row['type'] : row }.join(',')}" if Array(data[:scenes]).any?
-
details << "ocr=#{normalize_text(data[:ocr_text])}" if data[:ocr_text].to_s.present?
-
details << "transcript=#{normalize_text(data[:transcript])}" if data[:transcript].to_s.present?
-
details << "description=#{normalize_text(data[:description])}" if data[:description].to_s.present?
-
details << "faces=#{data[:face_count].to_i}" if data[:face_count].to_i.positive?
-
details << "ownership=#{data[:ownership_classification] || data['ownership_classification']}" if (data[:ownership_classification] || data['ownership_classification']).to_s.present?
-
details << "ownership_conf=#{data[:ownership_confidence] || data['ownership_confidence']}" if (data[:ownership_confidence] || data['ownership_confidence']).to_s.present?
-
details << "ownership_reason=#{Array(data[:ownership_reason_codes] || data['ownership_reason_codes']).first(6).join(',')}" if Array(data[:ownership_reason_codes] || data['ownership_reason_codes']).any?
-
details << "policy=#{generation_policy[:allow_comment] ? 'allow' : 'skip'}:#{generation_policy[:reason_code]}" if generation_policy.key?(:allow_comment)
-
-
return nil if details.empty?
-
-
line = "[#{ts}] Story Intelligence Extracted - #{details.join(' | ')}"
-
line.byteslice(0, 900)
-
end
-
-
def human_event_title(kind)
-
kind.to_s.tr("_", " ").split.map(&:capitalize).join(" ")
-
end
-
-
def normalize_text(value)
-
value.to_s.gsub(/\s+/, " ").strip.byteslice(0, 220)
-
end
-
-
def current_or_new_chunk!(entry:, timestamp:)
-
needed = words_in(entry)
-
current = @profile.instagram_profile_history_chunks.recent_first.first
-
return create_chunk!(sequence: 1, timestamp: timestamp) unless current
-
-
projected = current.word_count.to_i + needed
-
return current if projected <= CHUNK_WORD_LIMIT
-
-
create_chunk!(sequence: current.sequence.to_i + 1, timestamp: timestamp)
-
end
-
-
def create_chunk!(sequence:, timestamp:)
-
@profile.instagram_profile_history_chunks.create!(
-
instagram_account: @account,
-
sequence: sequence,
-
content: "",
-
word_count: 0,
-
entry_count: 0,
-
starts_at: timestamp,
-
ends_at: timestamp,
-
metadata: { source: "event_narrative_builder", chunk_word_limit: CHUNK_WORD_LIMIT }
-
)
-
end
-
-
def words_in(text)
-
text.to_s.scan(/\b[^\s]+\b/).length
-
end
-
-
def with_profile_lock(&block)
-
@profile.with_lock(&block)
-
end
-
end
-
end
-
module Ai
-
class ProviderRegistry
-
PROVIDERS = {
-
"local" => "Ai::Providers::LocalProvider"
-
}.freeze
-
-
class << self
-
def provider_keys
-
PROVIDERS.keys
-
end
-
-
def ensure_settings!
-
provider_keys.each do |provider|
-
AiProviderSetting.find_or_create_by!(provider: provider) do |row|
-
row.enabled = default_enabled?(provider)
-
row.priority = default_priority(provider)
-
end
-
end
-
end
-
-
def enabled_settings
-
ensure_settings!
-
AiProviderSetting.where(provider: provider_keys, enabled: true).order(priority: :asc, provider: :asc)
-
end
-
-
def all_settings
-
ensure_settings!
-
AiProviderSetting.where(provider: provider_keys).enabled_first
-
end
-
-
def build_provider(provider_key, setting: nil)
-
klass_name = PROVIDERS[provider_key.to_s]
-
raise "Unsupported AI provider: #{provider_key}" if klass_name.blank?
-
-
klass_name.constantize.new(setting: setting || AiProviderSetting.find_by(provider: provider_key))
-
end
-
-
private
-
-
def default_enabled?(provider)
-
case provider
-
when "local"
-
true # Local provider is always available if services are running
-
else
-
false
-
end
-
end
-
-
def default_priority(provider)
-
case provider
-
when "local" then 1 # Highest priority for local processing
-
else 100
-
end
-
end
-
end
-
end
-
end
-
module Ai
-
module Providers
-
class BaseProvider
-
attr_reader :setting
-
-
def initialize(setting: nil)
-
@setting = setting
-
end
-
-
def key
-
raise NotImplementedError
-
end
-
-
def display_name
-
setting&.display_name || key.to_s.humanize
-
end
-
-
def supports_profile?
-
false
-
end
-
-
def supports_post_image?
-
false
-
end
-
-
def supports_post_video?
-
false
-
end
-
-
def available?
-
return false unless setting&.enabled == true
-
-
!requires_api_key? || setting&.api_key_present?
-
end
-
-
def requires_api_key?
-
true
-
end
-
-
def preferred_model
-
effective_model
-
end
-
-
def test_key!
-
raise NotImplementedError
-
end
-
-
def analyze_profile!(_profile_payload:, _media: nil)
-
raise NotImplementedError
-
end
-
-
def analyze_post!(_post_payload:, _media: nil, _provider_options: {})
-
raise NotImplementedError
-
end
-
-
protected
-
-
def ensure_api_key!
-
return setting.effective_api_key if setting&.effective_api_key.to_s.present?
-
-
raise "Missing API key for #{display_name}"
-
end
-
-
def effective_model
-
setting&.effective_model.to_s
-
end
-
end
-
end
-
end
-
module Ai
-
module Providers
-
class LocalProvider < BaseProvider
-
def initialize(setting: nil, video_frame_change_detector_service: VideoFrameChangeDetectorService.new)
-
super(setting: setting)
-
@video_frame_change_detector_service = video_frame_change_detector_service
-
end
-
-
def key
-
"local"
-
end
-
-
def supports_profile?
-
true
-
end
-
-
def supports_post_image?
-
true
-
end
-
-
def supports_post_video?
-
true
-
end
-
-
def requires_api_key?
-
false
-
end
-
-
def test_key!
-
# Test both microservice and Ollama
-
microservice_result = client.test_connection!
-
ollama_result = ollama_client.test_connection!
-
-
if microservice_result[:ok] && ollama_result[:ok]
-
{
-
ok: true,
-
message: "Local AI services are healthy",
-
microservice: microservice_result[:services],
-
ollama: ollama_result[:models]
-
}
-
else
-
errors = []
-
errors << "Microservice: #{microservice_result[:message]}" unless microservice_result[:ok]
-
errors << "Ollama: #{ollama_result[:message]}" unless ollama_result[:ok]
-
-
{ ok: false, message: errors.join(" | ") }
-
end
-
rescue StandardError => e
-
{ ok: false, message: e.message.to_s }
-
end
-
-
def analyze_profile!(profile_payload:, media: nil)
-
image_labels = []
-
-
Array(media).each do |item|
-
next unless item.is_a?(Hash)
-
-
if item[:url].to_s.start_with?("http://", "https://")
-
vision = client.analyze_image_uri!(item[:url], features: image_features)
-
image_labels.concat(extract_image_labels(vision))
-
elsif item[:bytes].present?
-
vision = client.analyze_image_bytes!(item[:bytes], features: image_features)
-
image_labels.concat(extract_image_labels(vision))
-
end
-
rescue StandardError => e
-
image_labels << "image_analysis_error:#{e.class.name}"
-
end
-
-
bio = profile_payload[:bio].to_s
-
recent_messages = Array(profile_payload[:recent_outgoing_messages]).map { |m| m[:body].to_s }.join(" ")
-
combined = [ bio, recent_messages ].join(" ").downcase
-
demo = infer_demographic_estimates(text: combined, bio: bio, labels: image_labels)
-
-
languages = []
-
languages << { language: "english", confidence: 0.7, evidence: "ASCII text in bio/messages" } if combined.match?(/[a-z]{3,}/)
-
-
analysis = {
-
"summary" => "Rule-based local AI analysis from profile text and vision labels.",
-
"languages" => languages,
-
"likes" => image_labels.first(10),
-
"dislikes" => [],
-
"intent_labels" => [ "unknown" ],
-
"conversation_hooks" => image_labels.first(3).map { |label| { "hook" => "Ask about #{label}", "evidence" => "vision_label:#{label}" } },
-
"personalization_tokens" => image_labels.first(5),
-
"no_go_zones" => [],
-
"writing_style" => {
-
"tone" => infer_tone(combined),
-
"formality" => infer_formality(combined),
-
"emoji_usage" => combined.match?(/[^\x00-\x7F]/) ? "present" : "low",
-
"slang_level" => infer_slang(combined),
-
"evidence" => "Derived from bio + latest outgoing messages."
-
},
-
"response_style_prediction" => "unknown",
-
"engagement_probability" => image_labels.any? ? 0.55 : 0.35,
-
"recommended_next_action" => image_labels.any? ? "comment" : "review",
-
"demographic_estimates" => {
-
"age" => demo[:age],
-
"age_confidence" => demo[:age_confidence],
-
"gender" => demo[:gender],
-
"gender_confidence" => demo[:gender_confidence],
-
"location" => demo[:location],
-
"location_confidence" => demo[:location_confidence],
-
"evidence" => demo[:evidence]
-
},
-
"self_declared" => {
-
"age" => extract_age(bio),
-
"gender" => nil,
-
"location" => nil,
-
"pronouns" => extract_pronouns(bio),
-
"other" => nil
-
},
-
"suggested_dm_openers" => [
-
"Your recent posts are a vibe, what are you into most these days? ✨",
-
"Okay your content style is low-key fire, what inspired it? 🔥",
-
"Your feed feels super intentional, got any creator recs?",
-
"Not gonna lie, your profile energy is elite. What do you like posting most?",
-
"Your page is giving main-character energy, what are you building next? 👀"
-
],
-
"suggested_comment_templates" => [
-
"This is such a vibe 🔥",
-
"Okay this ate, love this one 👏",
-
"Clean shot, super satisfying fr",
-
"This goes hard, great share ✨",
-
"Big fan of this style, keep it coming 🙌"
-
],
-
"confidence_notes" => "Built with local AI models to minimize cost; output is conservative and evidence-driven.",
-
"why_not_confident" => "Limited structured text/bio and limited image context."
-
}
-
-
{
-
model: "local-ai-vision+rules",
-
prompt: {
-
provider: key,
-
image_count: Array(media).length,
-
rule_based: true
-
},
-
response_text: "local_ai_rule_based_analysis",
-
response_raw: { image_labels: image_labels },
-
analysis: analysis
-
}
-
end
-
-
def analyze_post!(post_payload:, media: nil, provider_options: {})
-
options = normalize_provider_options(provider_options)
-
media_hash = media.is_a?(Hash) ? media : {}
-
labels = []
-
raw = {}
-
image_description = nil
-
-
case media_hash[:type].to_s
-
when "image"
-
vision, vision_warning = safe_media_analysis(stage: "image_analysis", media_type: "image") do
-
analyze_image_media(media_hash, provider_options: options)
-
end
-
raw[:vision] = vision
-
labels = extract_image_labels(vision)
-
if vision_warning
-
labels << warning_label_for_error(vision_warning[:error_class], prefix: "image_analysis_error")
-
raw[:vision_warning] = vision_warning
-
end
-
labels = labels.uniq
-
image_description =
-
if labels.any?
-
build_image_description_from_vision(vision, labels: labels)
-
else
-
"Image analysis unavailable."
-
end
-
when "video"
-
mode = classify_video_processing(media_hash)
-
raw[:video_processing] = (mode[:metadata].is_a?(Hash) ? mode[:metadata] : {}).merge(
-
processing_mode: mode[:processing_mode].to_s,
-
static: ActiveModel::Type::Boolean.new.cast(mode[:static]),
-
duration_seconds: mode[:duration_seconds]
-
).compact
-
if mode[:processing_mode].to_s == "static_image" && mode[:frame_bytes].present?
-
static_media = {
-
type: "image",
-
content_type: mode[:frame_content_type].to_s.presence || "image/jpeg",
-
bytes: mode[:frame_bytes]
-
}
-
vision, vision_warning = safe_media_analysis(stage: "image_analysis", media_type: "image") do
-
analyze_image_media(static_media, provider_options: options)
-
end
-
raw[:vision] = vision
-
labels = extract_image_labels(vision)
-
if vision_warning
-
labels << warning_label_for_error(vision_warning[:error_class], prefix: "image_analysis_error")
-
raw[:vision_warning] = vision_warning
-
end
-
labels = labels.uniq
-
image_description =
-
if labels.any?
-
"Static video detected; analyzed representative frame. #{build_image_description_from_vision(vision, labels: labels)}".strip
-
else
-
"Static video detected, but frame analysis was unavailable."
-
end
-
else
-
video, video_warning = safe_media_analysis(stage: "video_analysis", media_type: "video") do
-
analyze_video_media(media_hash, provider_options: options)
-
end
-
raw[:video] = video
-
labels = extract_video_labels(video)
-
if video_warning
-
labels << warning_label_for_error(video_warning[:error_class], prefix: "video_analysis_error")
-
raw[:video_warning] = video_warning
-
end
-
labels = labels.uniq
-
image_description =
-
if labels.any?
-
build_image_description_from_video(video, labels: labels)
-
else
-
"Video analysis unavailable."
-
end
-
end
-
else
-
labels = []
-
image_description = "No image or video content available for visual description."
-
end
-
-
visual_labels = meaningful_visual_labels(labels)
-
detected_face_count = extract_face_count_from_raw(raw)
-
if detected_face_count.positive? && !visual_labels.include?("person")
-
visual_labels << "person"
-
end
-
visual_labels = visual_labels.uniq
-
image_description = unavailable_visual_description(raw: raw, media_type: media_hash[:type]) if visual_labels.empty?
-
-
author_tags = Array(post_payload.dig(:author_profile, :tags)).map(&:to_s)
-
ignore_tags = Array(post_payload.dig(:rules, :ignore_if_tagged)).map(&:to_s)
-
prefer_tags = Array(post_payload.dig(:rules, :prefer_interact_if_tagged)).map(&:to_s)
-
-
author_type = infer_author_type(author_tags)
-
ignored = !(author_tags & ignore_tags).empty?
-
preferred = !(author_tags & prefer_tags).empty?
-
-
relevant = if ignored
-
false
-
elsif preferred
-
true
-
else
-
visual_labels.any?
-
end
-
-
actions = if ignored
-
[ "ignore" ]
-
elsif preferred
-
[ "review", "like_suggestion", "comment_suggestion" ]
-
else
-
[ "review" ]
-
end
-
-
comment_generation =
-
if !options[:include_comment_generation]
-
comment_generation_disabled_result
-
elsif visual_labels.any?
-
generate_engagement_comments_with_fallback(
-
post_payload: post_payload,
-
image_description: image_description,
-
labels: visual_labels,
-
author_type: author_type
-
)
-
else
-
skipped_comment_generation_for_missing_visuals(raw: raw, media_type: media_hash[:type])
-
end
-
-
{
-
model: [ "local-ai-vision-video+rules", comment_generation[:model] ].compact.join("+"),
-
prompt: {
-
provider: key,
-
media_type: media_hash[:type].to_s,
-
rule_based: true,
-
provider_options: options
-
},
-
response_text: "local_ai_rule_based_post_analysis",
-
response_raw: raw.merge(
-
comment_generation: {
-
status: comment_generation[:status],
-
source: comment_generation[:source],
-
fallback_used: comment_generation[:fallback_used],
-
model: comment_generation[:model],
-
error_message: comment_generation[:error_message],
-
raw: comment_generation[:raw]
-
}
-
),
-
analysis: {
-
"image_description" => image_description,
-
"relevant" => relevant,
-
"author_type" => author_type,
-
"topics" => visual_labels.first(12),
-
"detected_face_count" => detected_face_count,
-
"visual_signal_count" => visual_labels.length,
-
"sentiment" => "unknown",
-
"suggested_actions" => actions,
-
"recommended_next_action" => actions.first || "review",
-
"engagement_score" => visual_labels.any? ? 0.6 : 0.2,
-
"comment_suggestions" => comment_generation[:comment_suggestions] ||
-
(JSON.parse(comment_generation[:raw][:response])&.dig("comment_suggestions") rescue []),
-
"comment_generation_status" => comment_generation[:status],
-
"comment_generation_source" => comment_generation[:source],
-
"comment_generation_fallback_used" => ActiveModel::Type::Boolean.new.cast(comment_generation[:fallback_used]),
-
"comment_generation_error" => comment_generation[:error_message].to_s.presence,
-
"personalization_tokens" => visual_labels.first(5),
-
"video_processing_mode" => mode_for(media_hash: media_hash, raw: raw),
-
"video_static_detected" => static_video_detected?(media_hash: media_hash, raw: raw),
-
"confidence" => visual_labels.any? ? 0.65 : 0.2,
-
"evidence" => visual_labels.any? ? "Local AI visual signals: #{visual_labels.first(6).join(', ')}" : "No verified visual signals detected; comment generation skipped"
-
}
-
}
-
end
-
-
private
-
-
def client
-
@client ||= Ai::LocalMicroserviceClient.new
-
end
-
-
def ollama_client
-
@ollama_client ||= Ai::OllamaClient.new
-
end
-
-
def image_features(provider_options = {})
-
options = normalize_provider_options(provider_options)
-
types = image_feature_types_for_options(options: options)
-
-
rows = []
-
rows << { type: "LABEL_DETECTION", maxResults: 15 } if types.include?("LABEL_DETECTION")
-
rows << { type: "TEXT_DETECTION", maxResults: 10 } if types.include?("TEXT_DETECTION")
-
rows << { type: "FACE_DETECTION", maxResults: 8 } if types.include?("FACE_DETECTION")
-
rows = [ { type: "LABEL_DETECTION", maxResults: 15 } ] if rows.empty?
-
rows
-
end
-
-
def image_feature_types_for_options(options:)
-
types = []
-
types << "LABEL_DETECTION"
-
types << "TEXT_DETECTION" if options[:include_ocr]
-
types << "FACE_DETECTION" if options[:include_faces]
-
types.uniq
-
end
-
-
def video_feature_types_for_options(options:)
-
return [] unless options[:include_video_analysis]
-
-
feature_types = [ "LABEL_DETECTION", "SHOT_CHANGE_DETECTION" ]
-
feature_types << "FACE_DETECTION" if options[:include_faces]
-
feature_types.uniq
-
end
-
-
def normalize_provider_options(provider_options)
-
raw = provider_options.is_a?(Hash) ? provider_options : {}
-
options = {
-
visual_only: ActiveModel::Type::Boolean.new.cast(raw[:visual_only] || raw["visual_only"]),
-
include_faces: true,
-
include_ocr: true,
-
include_comment_generation: true,
-
include_video_analysis: true
-
}
-
-
options[:include_faces] =
-
if raw.key?(:include_faces) || raw.key?("include_faces")
-
ActiveModel::Type::Boolean.new.cast(raw[:include_faces] || raw["include_faces"])
-
elsif options[:visual_only]
-
false
-
else
-
true
-
end
-
-
options[:include_ocr] =
-
if raw.key?(:include_ocr) || raw.key?("include_ocr")
-
ActiveModel::Type::Boolean.new.cast(raw[:include_ocr] || raw["include_ocr"])
-
elsif options[:visual_only]
-
false
-
else
-
true
-
end
-
-
options[:include_comment_generation] =
-
if raw.key?(:include_comment_generation) || raw.key?("include_comment_generation")
-
ActiveModel::Type::Boolean.new.cast(raw[:include_comment_generation] || raw["include_comment_generation"])
-
else
-
true
-
end
-
-
options[:include_video_analysis] =
-
if raw.key?(:include_video_analysis) || raw.key?("include_video_analysis")
-
ActiveModel::Type::Boolean.new.cast(raw[:include_video_analysis] || raw["include_video_analysis"])
-
else
-
true
-
end
-
-
options
-
end
-
-
def comment_generation_disabled_result
-
{
-
model: ollama_model,
-
raw: {},
-
source: "policy",
-
status: "disabled_by_provider_options",
-
fallback_used: false,
-
error_message: nil,
-
comment_suggestions: []
-
}
-
end
-
-
def classify_video_processing(media)
-
bytes = media[:bytes]
-
return {
-
processing_mode: "dynamic_video",
-
frame_bytes: nil,
-
frame_content_type: nil,
-
metadata: { reason: "video_bytes_missing" }
-
} if bytes.blank?
-
-
result = @video_frame_change_detector_service.classify(
-
video_bytes: bytes,
-
reference_id: media[:reference_id].to_s.presence || "post_media",
-
content_type: media[:content_type]
-
)
-
result.is_a?(Hash) ? result : { processing_mode: "dynamic_video", metadata: { reason: "frame_change_detector_invalid_result" } }
-
rescue StandardError => e
-
{
-
processing_mode: "dynamic_video",
-
frame_bytes: nil,
-
frame_content_type: nil,
-
metadata: {
-
reason: "frame_change_detection_failed",
-
error_class: e.class.name,
-
error_message: normalize_error_message(e.message)
-
}
-
}
-
end
-
-
def mode_for(media_hash:, raw:)
-
return nil unless media_hash[:type].to_s == "video"
-
-
raw.dig(:video_processing, :processing_mode).to_s.presence || "dynamic_video"
-
end
-
-
def static_video_detected?(media_hash:, raw:)
-
return false unless media_hash[:type].to_s == "video"
-
-
raw.dig(:video_processing, :processing_mode).to_s == "static_image"
-
end
-
-
def analyze_image_media(media, provider_options: {})
-
if media[:bytes].present?
-
# Ensure bytes are properly encoded for binary data
-
bytes_data = media[:bytes].is_a?(String) ? media[:bytes].force_encoding("BINARY") : media[:bytes]
-
client.analyze_image_bytes!(bytes_data, features: image_features(provider_options))
-
elsif media[:url].to_s.start_with?("http://", "https://")
-
client.analyze_image_uri!(media[:url], features: image_features(provider_options))
-
else
-
{}
-
end
-
end
-
-
def analyze_video_media(media, provider_options: {})
-
feature_types = video_feature_types_for_options(options: normalize_provider_options(provider_options))
-
return { response: { annotationResults: [ {} ] } } if feature_types.empty?
-
-
bytes = media[:bytes]
-
raise "Video blob unavailable" if bytes.blank?
-
-
client.analyze_video_bytes!(bytes, features: feature_types)
-
end
-
-
def extract_image_labels(vision_response)
-
labels = Array(vision_response["labelAnnotations"]).map { |v| v["description"].to_s.downcase.strip }.reject(&:blank?)
-
texts = Array(vision_response["textAnnotations"]).map { |v| v["description"].to_s.downcase.strip }.reject(&:blank?)
-
faces = Array(vision_response["faceAnnotations"]).length
-
labels << "person" if faces.positive?
-
(labels + texts.first(2)).uniq
-
end
-
-
def extract_video_labels(video_response)
-
ann = video_response.dig("response", "annotationResults", 0)
-
arr = Array(ann&.dig("segmentLabelAnnotations")) + Array(ann&.dig("shotLabelAnnotations"))
-
arr.map { |item| item.dig("entity", "description").to_s.downcase.strip }.reject(&:blank?).uniq
-
end
-
-
def infer_author_type(tags)
-
return "relative" if tags.include?("relative")
-
return "friend" if tags.include?("friend") || tags.include?("female_friend") || tags.include?("male_friend")
-
return "page" if tags.include?("page")
-
return "personal_user" if tags.include?("personal_user")
-
-
"unknown"
-
end
-
-
def build_comment_suggestions(labels:, description:)
-
desc = description.to_s.strip
-
topic = labels.first.to_s.strip
-
anchor = topic.presence
-
-
if desc.blank? && anchor.blank?
-
return []
-
end
-
-
[
-
"Okay this #{anchor} is elite 🔥",
-
"This whole vibe is so clean, love it ✨",
-
"Not gonna lie this one ate 👏",
-
"The energy here is immaculate fr 😮💨",
-
"This is super engaging, big fan 🙌"
-
]
-
end
-
-
def generate_engagement_comments(post_payload:, image_description:, labels:, author_type:)
-
generator = Ai::LocalEngagementCommentGenerator.new(
-
ollama_client: ollama_client,
-
model: ollama_model
-
)
-
-
out = generator.generate!(
-
post_payload: post_payload,
-
image_description: image_description.to_s,
-
topics: labels.first(12),
-
author_type: author_type,
-
historical_comments: extract_historical_comments(post_payload),
-
historical_context: extract_historical_context(post_payload)
-
)
-
-
return out unless out[:error_message].present?
-
-
{
-
model: ollama_model,
-
raw: {},
-
source: "fallback",
-
status: "error_fallback",
-
fallback_used: true,
-
error_message: out[:error_message],
-
comment_suggestions: build_comment_suggestions(labels: labels, description: image_description)
-
}
-
end
-
-
def generate_engagement_comments_with_fallback(post_payload:, image_description:, labels:, author_type:)
-
generate_engagement_comments(
-
post_payload: post_payload,
-
image_description: image_description,
-
labels: labels,
-
author_type: author_type
-
)
-
rescue StandardError => e
-
started_at = monotonic_started_at
-
warning = {
-
stage: "comment_generation",
-
media_type: "post",
-
error_class: e.class.name,
-
error_message: normalize_error_message(e.message)
-
}
-
record_provider_warning!(
-
warning: warning,
-
started_at: started_at,
-
category: "text_generation"
-
)
-
{
-
model: ollama_model,
-
raw: {},
-
source: "fallback",
-
status: "error_fallback",
-
fallback_used: true,
-
error_message: warning[:error_message],
-
comment_suggestions: build_comment_suggestions(labels: labels, description: image_description)
-
}
-
end
-
-
def meaningful_visual_labels(labels)
-
Array(labels).map(&:to_s).map(&:downcase).map(&:strip).reject(&:blank?).reject do |label|
-
label.start_with?("image_analysis_error:", "video_analysis_error:")
-
end.uniq
-
end
-
-
def extract_face_count_from_raw(raw)
-
vision_faces = Array(raw.dig(:vision, "faceAnnotations")).length
-
vision_faces.positive? ? vision_faces : Array(raw.dig(:vision, :faceAnnotations)).length
-
rescue StandardError
-
0
-
end
-
-
def unavailable_visual_description(raw:, media_type:)
-
warning = raw[:vision_warning] || raw[:video_warning]
-
if warning.is_a?(Hash)
-
detail = warning[:error_message].to_s.presence || warning["error_message"].to_s.presence || "analysis_error"
-
return "Visual analysis unavailable (#{detail.byteslice(0, 120)})."
-
end
-
-
case media_type.to_s
-
when "image"
-
"Image analysis unavailable or returned no verifiable visual signals."
-
when "video"
-
"Video analysis unavailable or returned no verifiable visual signals."
-
else
-
"No image or video content available for visual description."
-
end
-
end
-
-
def skipped_comment_generation_for_missing_visuals(raw:, media_type:)
-
{
-
model: ollama_model,
-
raw: {},
-
source: "policy",
-
status: "skipped_no_visual_signals",
-
fallback_used: false,
-
error_message: unavailable_visual_description(raw: raw, media_type: media_type),
-
comment_suggestions: []
-
}
-
end
-
-
def safe_media_analysis(stage:, media_type:)
-
started_at = monotonic_started_at
-
payload = yield
-
[ payload, nil ]
-
rescue StandardError => e
-
warning = {
-
stage: stage.to_s,
-
media_type: media_type.to_s,
-
error_class: e.class.name,
-
error_message: normalize_error_message(e.message)
-
}
-
record_provider_warning!(
-
warning: warning,
-
started_at: started_at,
-
category: stage.to_s == "video_analysis" ? "video_analysis" : "image_analysis"
-
)
-
[ {}, warning ]
-
end
-
-
def warning_label_for_error(error_class, prefix:)
-
klass = error_class.to_s.presence || "UnknownError"
-
"#{prefix}:#{klass}"
-
end
-
-
def record_provider_warning!(warning:, started_at:, category:)
-
payload = warning.to_h.merge(provider: key)
-
-
Ops::StructuredLogger.warn(
-
event: "ai.local_provider.fallback",
-
payload: payload
-
)
-
-
Ai::ApiUsageTracker.track_failure(
-
provider: "local_ai_stack",
-
operation: warning[:stage].to_s.presence || "unknown_stage",
-
category: category,
-
started_at: started_at,
-
error: "#{warning[:error_class]}: #{warning[:error_message]}",
-
metadata: payload
-
)
-
rescue StandardError
-
nil
-
end
-
-
def monotonic_started_at
-
Process.clock_gettime(Process::CLOCK_MONOTONIC)
-
rescue StandardError
-
Time.current.to_f
-
end
-
-
def normalize_error_message(message)
-
text = message.to_s.strip
-
text = "unknown error" if text.blank?
-
text.byteslice(0, 280)
-
end
-
-
def ollama_model
-
@ollama_model ||= setting&.config_value("ollama_model").to_s.presence ||
-
Rails.application.credentials.dig(:ollama, :model).to_s.presence ||
-
"mistral:7b"
-
end
-
-
def extract_historical_comments(post_payload)
-
history = post_payload.dig(:rules, :engagement_history, :prior_story_items)
-
Array(history).filter_map do |row|
-
row = row.to_h if row.respond_to?(:to_h)
-
row.is_a?(Hash) ? row[:sent_comment].to_s.presence || row["sent_comment"].to_s.presence : nil
-
end
-
rescue StandardError
-
[]
-
end
-
-
def extract_historical_context(post_payload)
-
post_payload.dig(:rules, :historical_narrative_text).to_s
-
rescue StandardError
-
""
-
end
-
-
def build_image_description_from_vision(vision, labels:)
-
top_labels = labels.first(5)
-
text = Array(vision["textAnnotations"]).first&.dig("description").to_s.strip
-
-
parts = []
-
parts << "Likely shows: #{top_labels.join(', ')}." if top_labels.any?
-
parts << "Visible text: #{text.tr("\n", " ").byteslice(0, 120)}." if text.present?
-
-
out = parts.join(" ").strip
-
out.presence || "Image content appears visually clear but limited contextual details were detected."
-
end
-
-
def build_image_description_from_video(video, labels:)
-
top = labels.first(6)
-
return "Video frames indicate: #{top.join(', ')}." if top.any?
-
-
"Video content analyzed with local AI models."
-
end
-
-
def infer_tone(text)
-
return "enthusiastic" if text.include?("!")
-
return "casual" if text.match?(/\b(hey|yo|lol|omg)\b/)
-
-
"neutral"
-
end
-
-
def infer_formality(text)
-
text.match?(/\b(please|thanks|regards)\b/) ? "formal" : "casual"
-
end
-
-
def infer_slang(text)
-
text.match?(/\b(lol|lmao|bro|fam|idk|tbh)\b/) ? "medium" : "low"
-
end
-
-
def extract_age(text)
-
m = text.match(/\b(i am|i'm)\s+(\d{2})\b/i)
-
return nil unless m
-
-
m[2].to_i
-
end
-
-
def extract_pronouns(text)
-
return "she/her" if text.match?(/\bshe\s*\/\s*her\b/i)
-
return "he/him" if text.match?(/\bhe\s*\/\s*him\b/i)
-
return "they/them" if text.match?(/\bthey\s*\/\s*them\b/i)
-
-
nil
-
end
-
-
def infer_demographic_estimates(text:, bio:, labels:)
-
age =
-
extract_age(bio) ||
-
if text.match?(/\b(high school|class of 20\d{2})\b/)
-
17
-
elsif text.match?(/\b(student|college|university|campus)\b/)
-
21
-
elsif text.match?(/\b(mom|dad|parent)\b/)
-
34
-
else
-
26
-
end
-
-
gender =
-
if text.match?(/\b(she\/her|she her|woman|girl|mrs|ms)\b/)
-
"female"
-
elsif text.match?(/\b(he\/him|he him|man|boy|mr)\b/)
-
"male"
-
elsif text.match?(/\b(they\/them|non[- ]?binary)\b/)
-
"non-binary"
-
else
-
"unknown"
-
end
-
-
location =
-
if (m = text.match(/(?:📍|based in|from)\s+([a-z][a-z\s,.-]{2,40})/))
-
m[1].to_s.split(/[|•]/).first.to_s.strip.titleize
-
elsif text.match?(/\b(usa|us|united states)\b/)
-
"United States"
-
elsif text.match?(/\b(india|indian|hindi)\b/)
-
"India"
-
else
-
"unknown"
-
end
-
-
{
-
age: age,
-
age_confidence: extract_age(bio).present? ? 0.75 : 0.3,
-
gender: gender,
-
gender_confidence: gender == "unknown" ? 0.2 : 0.35,
-
location: location,
-
location_confidence: location == "unknown" ? 0.2 : 0.35,
-
evidence: "Estimated from bio/text pronouns, language hints, and local AI vision labels: #{Array(labels).first(4).join(', ')}"
-
}
-
end
-
end
-
end
-
end
-
require "digest"
-
require "uri"
-
-
module Ai
-
class Runner
-
def initialize(account:)
-
@account = account
-
end
-
-
def analyze!(
-
purpose:,
-
analyzable:,
-
payload:,
-
media: nil,
-
media_fingerprint: nil,
-
allow_cached: true,
-
provider_options: {}
-
)
-
fingerprint = if purpose == "post"
-
media_fingerprint.to_s.presence || compute_media_fingerprint(media)
-
end
-
-
if allow_cached && purpose == "post"
-
cached = reusable_analysis_for(purpose: purpose, media_fingerprint: fingerprint)
-
return build_cached_run(cached: cached, analyzable: analyzable, purpose: purpose, payload: payload, media_fingerprint: fingerprint) if cached
-
end
-
-
candidates = candidate_providers(purpose: purpose, media: media)
-
failures = []
-
-
candidates.each do |provider|
-
analysis = AiAnalysis.create!(
-
instagram_account: @account,
-
analyzable: analyzable,
-
purpose: purpose,
-
provider: provider.key,
-
model: provider.preferred_model.presence,
-
status: "running",
-
started_at: Time.current,
-
media_fingerprint: fingerprint,
-
metadata: {
-
provider_display_name: provider.display_name,
-
provider_options: (provider_options.is_a?(Hash) ? provider_options : {})
-
}
-
)
-
-
begin
-
result = Ai::ApiUsageTracker.with_context(instagram_account_id: @account.id, workflow: "ai_runner", purpose: purpose) do
-
case purpose
-
when "profile"
-
provider.analyze_profile!(profile_payload: payload, media: media)
-
when "post"
-
provider.analyze_post!(post_payload: payload, media: media, provider_options: provider_options)
-
else
-
raise "Unsupported AI purpose: #{purpose}"
-
end
-
end
-
-
analysis.update!(
-
model: result[:model].presence || analysis.model,
-
status: "succeeded",
-
finished_at: Time.current,
-
prompt: JSON.generate(result[:prompt] || {}),
-
response_text: result[:response_text].to_s,
-
analysis: result[:analysis],
-
input_completeness_score: input_completeness_score(payload),
-
confidence_score: confidence_score(purpose: purpose, analysis: result[:analysis]),
-
evidence_count: evidence_count(purpose: purpose, analysis: result[:analysis]),
-
signals_detected_count: signals_detected_count(purpose: purpose, analysis: result[:analysis]),
-
prompt_version: "v1",
-
schema_version: schema_version_for(purpose: purpose),
-
metadata: (analysis.metadata || {}).merge(
-
cache_hit: false,
-
raw: result[:response_raw]
-
),
-
error_message: nil
-
)
-
-
sync_materialized_insights!(purpose: purpose, analysis_record: analysis, payload: payload, analysis_hash: result[:analysis])
-
-
return { record: analysis, result: result, provider: provider }
-
rescue StandardError => e
-
analysis.update!(status: "failed", finished_at: Time.current, error_message: e.message.to_s)
-
failures << "#{provider.display_name}: #{e.message}"
-
end
-
end
-
-
raise "All enabled AI providers failed. #{failures.join(' | ')}"
-
end
-
-
private
-
-
def candidate_providers(purpose:, media:)
-
settings = Ai::ProviderRegistry.enabled_settings.to_a
-
raise "No AI providers are enabled. Configure one in Admin > AI Providers." if settings.empty?
-
-
settings = filter_settings_by_daily_limit(settings: settings, purpose: purpose)
-
candidates = settings.filter_map do |setting|
-
provider = Ai::ProviderRegistry.build_provider(setting.provider, setting: setting)
-
next nil unless provider.available?
-
next nil unless supports_purpose?(provider, purpose: purpose, media: media)
-
-
provider
-
end
-
-
raise "No enabled AI provider supports this analysis type." if candidates.empty?
-
candidates
-
end
-
-
def reusable_analysis_for(purpose:, media_fingerprint:)
-
return nil if media_fingerprint.blank?
-
-
candidate = AiAnalysis.reusable_for(purpose: purpose, media_fingerprint: media_fingerprint).first
-
return nil unless candidate
-
return nil if purpose == "post" && legacy_post_comment_generation_payload?(candidate.analysis)
-
-
candidate
-
end
-
-
def legacy_post_comment_generation_payload?(analysis_hash)
-
return false unless analysis_hash.is_a?(Hash)
-
return false unless analysis_hash.key?("comment_suggestions")
-
return true if analysis_hash["comment_generation_status"].to_s == "error_fallback"
-
return true if analysis_hash["comment_generation_status"].to_s.blank?
-
return true if analysis_hash["evidence"].to_s.include?("No labels detected; used tag rules only")
-
return true unless analysis_hash.key?("visual_signal_count")
-
-
false
-
end
-
-
def build_cached_run(cached:, analyzable:, purpose:, payload:, media_fingerprint:)
-
provider = provider_for_key(cached.provider)
-
now = Time.current
-
-
record = AiAnalysis.create!(
-
instagram_account: @account,
-
analyzable: analyzable,
-
purpose: purpose,
-
provider: cached.provider,
-
model: cached.model,
-
status: "succeeded",
-
started_at: now,
-
finished_at: now,
-
prompt: cached.prompt,
-
response_text: cached.response_text,
-
analysis: cached.analysis,
-
input_completeness_score: input_completeness_score(payload),
-
confidence_score: cached.confidence_score,
-
evidence_count: cached.evidence_count,
-
signals_detected_count: cached.signals_detected_count,
-
prompt_version: cached.prompt_version,
-
schema_version: cached.schema_version,
-
media_fingerprint: media_fingerprint,
-
cache_hit: true,
-
cached_from_ai_analysis_id: cached.id,
-
metadata: (cached.metadata || {}).merge(
-
cache_hit: true,
-
reused_from_ai_analysis_id: cached.id,
-
reused_at: now.iso8601
-
)
-
)
-
-
sync_materialized_insights!(purpose: purpose, analysis_record: record, payload: payload, analysis_hash: cached.analysis)
-
-
{
-
record: record,
-
result: {
-
model: cached.model,
-
prompt: parsed_json_or_hash(cached.prompt),
-
response_text: cached.response_text.to_s,
-
response_raw: cached.metadata,
-
analysis: cached.analysis
-
},
-
provider: provider,
-
cached: true
-
}
-
end
-
-
def parsed_json_or_hash(value)
-
return value if value.is_a?(Hash)
-
-
JSON.parse(value.to_s)
-
rescue StandardError
-
{}
-
end
-
-
def provider_for_key(provider_key)
-
Ai::ProviderRegistry.build_provider(provider_key)
-
rescue StandardError
-
Struct.new(:key, :display_name).new(provider_key.to_s, provider_key.to_s.humanize)
-
end
-
-
def filter_settings_by_daily_limit(settings:, purpose:)
-
todays_counts = AiAnalysis.where(purpose: purpose, status: "succeeded")
-
.where(created_at: Time.current.all_day)
-
.group(:provider)
-
.count
-
-
with_load = settings.map do |setting|
-
limit = integer_or_nil(setting.config_value("daily_limit"))
-
used = todays_counts[setting.provider].to_i
-
utilization = limit.to_i.positive? ? (used.to_f / limit.to_f) : 0.0
-
[ setting, limit, used, utilization ]
-
end
-
-
available = with_load.reject { |_setting, limit, used, _utilization| limit.to_i.positive? && used >= limit }
-
sorted = available.sort_by { |setting, _limit, _used, utilization| [ setting.priority.to_i, utilization, setting.provider ] }
-
sorted.map(&:first)
-
end
-
-
def integer_or_nil(value)
-
return nil if value.blank?
-
-
Integer(value)
-
rescue StandardError
-
nil
-
end
-
-
def compute_media_fingerprint(media)
-
item = media.is_a?(Array) ? media.first : media
-
return nil unless item.is_a?(Hash)
-
-
bytes = item[:bytes] || item["bytes"]
-
return Digest::SHA256.hexdigest(bytes) if bytes.present?
-
-
data_url = item[:image_data_url] || item["image_data_url"]
-
return Digest::SHA256.hexdigest(data_url.to_s) if data_url.present?
-
-
url = item[:url] || item["url"]
-
normalized = normalize_url(url)
-
return Digest::SHA256.hexdigest(normalized) if normalized.present?
-
-
nil
-
end
-
-
def normalize_url(raw)
-
value = raw.to_s.strip
-
return nil if value.blank?
-
-
uri = URI.parse(value)
-
return value unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
-
-
"#{uri.scheme}://#{uri.host}#{uri.path}"
-
rescue StandardError
-
value
-
end
-
-
def supports_purpose?(provider, purpose:, media:)
-
return provider.supports_profile? if purpose == "profile"
-
-
return false unless purpose == "post"
-
-
type = media.is_a?(Hash) ? media[:type].to_s : ""
-
return provider.supports_post_video? if type == "video"
-
-
provider.supports_post_image?
-
end
-
-
def sync_materialized_insights!(purpose:, analysis_record:, payload:, analysis_hash:)
-
return unless analysis_hash.is_a?(Hash)
-
-
case purpose
-
when "profile"
-
Ai::InsightSync.sync_profile!(analysis_record: analysis_record, payload: payload, analysis_hash: analysis_hash)
-
when "post"
-
Ai::InsightSync.sync_post!(analysis_record: analysis_record, analysis_hash: analysis_hash)
-
end
-
end
-
-
def schema_version_for(purpose:)
-
case purpose
-
when "profile" then "profile_insights_v2"
-
when "post" then "post_insights_v2"
-
else "unknown"
-
end
-
end
-
-
def input_completeness_score(payload)
-
total = 0
-
present = 0
-
walk_payload(payload) do |value|
-
total += 1
-
present += 1 if value.present?
-
end
-
return nil if total <= 0
-
-
(present.to_f / total).round(4)
-
end
-
-
def walk_payload(value, &block)
-
case value
-
when Hash
-
value.each_value { |v| walk_payload(v, &block) }
-
when Array
-
if value.empty?
-
block.call(nil)
-
else
-
value.each { |v| walk_payload(v, &block) }
-
end
-
else
-
block.call(value)
-
end
-
end
-
-
def confidence_score(purpose:, analysis:)
-
return nil unless analysis.is_a?(Hash)
-
-
if purpose == "post"
-
val = analysis["confidence"]
-
return Float(val).clamp(0.0, 1.0) rescue nil
-
end
-
-
langs = Array(analysis["languages"]).size
-
likes = Array(analysis["likes"]).size
-
([(langs * 0.1) + (likes * 0.05), 1.0].min).round(4)
-
end
-
-
def evidence_count(purpose:, analysis:)
-
return 0 unless analysis.is_a?(Hash)
-
-
if purpose == "post"
-
count = 0
-
count += 1 if analysis["evidence"].to_s.present?
-
count += Array(analysis["topics"]).size
-
return count
-
end
-
-
count = 0
-
count += Array(analysis["languages"]).size
-
count += Array(analysis["likes"]).size
-
count += Array(analysis["dislikes"]).size
-
count += 1 if analysis["confidence_notes"].to_s.present?
-
count
-
end
-
-
def signals_detected_count(purpose:, analysis:)
-
return 0 unless analysis.is_a?(Hash)
-
-
if purpose == "post"
-
return Array(analysis["topics"]).size + Array(analysis["suggested_actions"]).size
-
end
-
-
self_declared = analysis["self_declared"].is_a?(Hash) ? analysis["self_declared"] : {}
-
declared_count = self_declared.values.count(&:present?)
-
-
Array(analysis["languages"]).size + Array(analysis["likes"]).size + declared_count
-
end
-
end
-
end
-
module Ai
-
class VerifiedStoryInsightBuilder
-
MIN_OCR_BLOCK_CONFIDENCE = 0.35
-
MIN_OBJECT_CONFIDENCE = 0.30
-
MIN_SIGNAL_SCORE_FOR_COMMENT = 3
-
MIN_OWNER_ALIGNMENT_CONFIDENCE = 0.58
-
-
OCR_USERNAME_REGEX = /@([a-zA-Z0-9._]{2,30})/
-
BARE_USERNAME_REGEX = /\b([a-zA-Z0-9._]{3,30})\b/
-
RESHARE_PATTERNS = [
-
/\brepost\b/i,
-
/\breshare\b/i,
-
/\bshared\s+from\b/i,
-
/\bvia\s+@?[a-z0-9._]+\b/i,
-
/\bcredit(?:s)?\b/i,
-
/\boriginal\s+by\b/i
-
].freeze
-
MEME_PATTERNS = [
-
/\bmemes?\b/i,
-
/\bi know nobody gave you\b/i,
-
/\bdon'?t worry\b/i,
-
/\bwhen you\b/i
-
].freeze
-
RESERVED_IG_SEGMENTS = %w[stories p reel reels tv explore accounts direct v].freeze
-
-
def initialize(profile:, local_story_intelligence:, metadata:)
-
@profile = profile
-
@raw = local_story_intelligence.is_a?(Hash) ? local_story_intelligence : {}
-
@metadata = metadata.is_a?(Hash) ? metadata : {}
-
end
-
-
def build
-
verified_story_facts = build_verified_story_facts
-
ownership_classification = classify_ownership(verified_story_facts: verified_story_facts)
-
generation_policy = build_generation_policy(
-
verified_story_facts: verified_story_facts,
-
ownership_classification: ownership_classification
-
)
-
-
{
-
verified_story_facts: verified_story_facts,
-
ownership_classification: ownership_classification,
-
generation_policy: generation_policy,
-
validated_at: Time.current.iso8601
-
}
-
end
-
-
private
-
-
def build_verified_story_facts
-
ocr_blocks = normalize_ocr_blocks(@raw[:ocr_blocks] || @raw["ocr_blocks"])
-
object_detections = normalize_object_detections(@raw[:object_detections] || @raw["object_detections"])
-
scenes = normalize_scenes(@raw[:scenes] || @raw["scenes"])
-
-
ocr_text = normalize_text(@raw[:ocr_text] || @raw["ocr_text"], max: 800)
-
if ocr_blocks.any?
-
ocr_text = ocr_blocks.map { |row| row[:text] }.join("\n").presence || ocr_text
-
end
-
-
transcript = normalize_text(@raw[:transcript] || @raw["transcript"], max: 800)
-
mentions = normalize_handle_array(@raw[:mentions] || @raw["mentions"], prefix: "@")
-
hashtags = normalize_handle_array(@raw[:hashtags] || @raw["hashtags"], prefix: "#")
-
objects = normalize_objects(@raw[:objects] || @raw["objects"], object_detections: object_detections)
-
topics = normalize_topics(@raw[:topics] || @raw["topics"], objects: objects, hashtags: hashtags)
-
faces = normalize_faces
-
detected_usernames = detect_usernames(
-
mentions: mentions,
-
profile_handles: @raw[:profile_handles] || @raw["profile_handles"],
-
ocr_text: ocr_text,
-
transcript: transcript,
-
metadata: @metadata
-
)
-
source_profile_references = extract_source_profile_references(metadata: @metadata)
-
source_profile_ids = extract_source_profile_ids(metadata: @metadata)
-
reshare_hits = detect_reshare_indicators(
-
ocr_text: ocr_text,
-
transcript: transcript,
-
metadata: @metadata
-
)
-
meme_markers = detect_meme_markers(
-
ocr_text: ocr_text,
-
transcript: transcript,
-
metadata: @metadata
-
)
-
identity_verification = build_identity_verification(
-
faces: faces,
-
topics: topics,
-
detected_usernames: detected_usernames,
-
source_profile_references: source_profile_references
-
)
-
-
signal_score = score_verified_signals(
-
ocr_text: ocr_text,
-
transcript: transcript,
-
objects: objects,
-
object_detections: object_detections,
-
scenes: scenes,
-
hashtags: hashtags,
-
mentions: mentions,
-
faces: faces
-
)
-
-
{
-
source: @raw[:source].to_s.presence || @raw["source"].to_s.presence || "unknown",
-
reason: @raw[:reason].to_s.presence || @raw["reason"].to_s.presence,
-
ocr_text: ocr_text,
-
ocr_blocks: ocr_blocks.first(30),
-
transcript: transcript,
-
object_detections: object_detections.first(30),
-
objects: objects.first(20),
-
scenes: scenes.first(20),
-
hashtags: hashtags.first(20),
-
mentions: mentions.first(20),
-
profile_handles: Array(@raw[:profile_handles] || @raw["profile_handles"]).map(&:to_s).first(20),
-
topics: topics.first(20),
-
detected_usernames: detected_usernames.first(20),
-
source_profile_references: source_profile_references.first(20),
-
source_profile_ids: source_profile_ids.first(20),
-
reshare_indicators: reshare_hits.first(12),
-
meme_markers: meme_markers.first(12),
-
media_type: @metadata["media_type"].to_s.presence,
-
faces: faces,
-
face_count: faces[:total_count].to_i,
-
people: faces[:people].first(12),
-
identity_verification: identity_verification,
-
signal_score: signal_score
-
}
-
end
-
-
def classify_ownership(verified_story_facts:)
-
profile_username = normalize_username(@profile&.username)
-
usernames = Array(verified_story_facts[:detected_usernames]).map { |value| normalize_username(value) }.reject(&:blank?).uniq
-
external_usernames = usernames.reject { |value| value == profile_username }
-
profile_username_detected = profile_username.present? && usernames.include?(profile_username)
-
source_profile_references = Array(verified_story_facts[:source_profile_references]).map { |value| normalize_username(value) }.reject(&:blank?).uniq
-
external_source_refs = source_profile_references.reject { |value| value == profile_username }
-
face_data = verified_story_facts[:faces].is_a?(Hash) ? verified_story_facts[:faces] : {}
-
primary_faces = face_data[:primary_user_count].to_i
-
secondary_faces = face_data[:secondary_person_count].to_i
-
identity_verification = verified_story_facts[:identity_verification].is_a?(Hash) ? verified_story_facts[:identity_verification] : {}
-
owner_likelihood = identity_verification[:owner_likelihood].to_s
-
identity_confidence = identity_verification[:confidence].to_f
-
non_primary_faces_without_primary = secondary_faces.positive? && primary_faces <= 0
-
reshare_hits = Array(verified_story_facts[:reshare_indicators]).map(&:to_s)
-
meme_markers = Array(verified_story_facts[:meme_markers]).map(&:to_s)
-
third_party_link = third_party_profile_link_detected?(profile_username: profile_username, metadata: @metadata)
-
share_status = infer_share_status(
-
profile_username_detected: profile_username_detected,
-
external_usernames: external_usernames,
-
external_source_refs: external_source_refs,
-
reshare_hits: reshare_hits,
-
meme_markers: meme_markers
-
)
-
-
reason_codes = []
-
reason_codes << "external_usernames_detected" if external_usernames.any?
-
reason_codes << "external_source_profile_reference_detected" if external_source_refs.any?
-
reason_codes << "profile_username_not_detected" if profile_username.present? && !profile_username_detected
-
reason_codes << "non_primary_faces_detected" if non_primary_faces_without_primary
-
reason_codes << "reshare_indicators_detected" if reshare_hits.any?
-
reason_codes << "meme_markers_detected" if meme_markers.any?
-
reason_codes << "third_party_profile_link_detected" if third_party_link
-
reason_codes << "identity_likelihood_low" if owner_likelihood == "low"
-
reason_codes << "identity_likelihood_high" if owner_likelihood == "high"
-
reason_codes << "identity_confidence_low" if identity_confidence.positive? && identity_confidence < 0.45
-
reason_codes << "share_status_#{share_status}" if share_status != "unknown"
-
-
signal_score = verified_story_facts[:signal_score].to_i
-
label = "owned_by_profile"
-
decision = "allow_comment"
-
-
if signal_score < MIN_SIGNAL_SCORE_FOR_COMMENT
-
label = "insufficient_evidence"
-
decision = "skip_comment"
-
reason_codes << "insufficient_verified_signals"
-
elsif meme_markers.any? && external_usernames.any?
-
label = "meme_reshare"
-
decision = "skip_comment"
-
elsif meme_markers.any? && !profile_username_detected
-
label = "meme_reshare"
-
decision = "skip_comment"
-
elsif share_status == "reshared" && external_usernames.any?
-
label = "reshare"
-
decision = "skip_comment"
-
elsif reshare_hits.any? || third_party_link
-
label = "reshare"
-
decision = "skip_comment"
-
elsif external_source_refs.any? && !profile_username_detected
-
label = "third_party_content"
-
decision = "skip_comment"
-
elsif external_usernames.any? && !profile_username_detected && non_primary_faces_without_primary
-
label = "third_party_content"
-
decision = "skip_comment"
-
elsif external_usernames.any? && !profile_username_detected && signal_score <= 3
-
label = "third_party_content"
-
decision = "skip_comment"
-
elsif non_primary_faces_without_primary && signal_score <= 2
-
label = "unrelated_post"
-
decision = "skip_comment"
-
elsif owner_likelihood == "low" && (external_usernames.any? || external_source_refs.any? || non_primary_faces_without_primary)
-
label = "third_party_content"
-
decision = "skip_comment"
-
elsif owner_likelihood == "high" && identity_confidence >= MIN_OWNER_ALIGNMENT_CONFIDENCE && share_status == "unknown" && reshare_hits.empty? && meme_markers.empty?
-
label = "owned_by_profile"
-
decision = "allow_comment"
-
end
-
-
{
-
label: label,
-
decision: decision,
-
confidence: ownership_confidence(
-
label: label,
-
reason_codes: reason_codes,
-
signal_score: signal_score
-
),
-
reason_codes: reason_codes.uniq,
-
profile_username_detected: profile_username_detected,
-
share_status: share_status,
-
source_profile_references: source_profile_references.first(10),
-
source_profile_ids: Array(verified_story_facts[:source_profile_ids]).map(&:to_s).first(10),
-
detected_external_usernames: external_usernames.first(10),
-
reshare_indicators: reshare_hits.first(10),
-
meme_markers: meme_markers.first(10),
-
identity_verification: identity_verification,
-
face_evidence: {
-
primary_user_count: primary_faces,
-
secondary_person_count: secondary_faces,
-
total_count: face_data[:total_count].to_i
-
},
-
summary: ownership_summary(
-
label: label,
-
external_usernames: external_usernames,
-
external_source_refs: external_source_refs,
-
reshare_hits: reshare_hits,
-
meme_markers: meme_markers,
-
primary_faces: primary_faces,
-
secondary_faces: secondary_faces,
-
signal_score: signal_score
-
)
-
}
-
end
-
-
def build_generation_policy(verified_story_facts:, ownership_classification:)
-
allow_comment = ownership_classification[:decision].to_s == "allow_comment"
-
identity_verification = verified_story_facts[:identity_verification].is_a?(Hash) ? verified_story_facts[:identity_verification] : {}
-
if allow_comment &&
-
ownership_classification[:label].to_s == "owned_by_profile" &&
-
identity_verification[:owner_likelihood].to_s == "low" &&
-
identity_verification[:confidence].to_f < MIN_OWNER_ALIGNMENT_CONFIDENCE
-
allow_comment = false
-
end
-
reason_code = if allow_comment
-
"verified_context_available"
-
else
-
ownership_classification[:reason_codes].first.to_s.presence || "policy_blocked"
-
end
-
reason = if allow_comment
-
"Verified context is sufficient for grounded generation."
-
else
-
ownership_classification[:summary].to_s.presence || "Insufficient or irrelevant verified context for safe comment generation."
-
end
-
-
{
-
allow_comment: allow_comment,
-
reason_code: reason_code,
-
reason: reason,
-
classification: ownership_classification[:label].to_s,
-
signal_score: verified_story_facts[:signal_score].to_i,
-
minimum_signal_score: MIN_SIGNAL_SCORE_FOR_COMMENT,
-
owner_likelihood: identity_verification[:owner_likelihood].to_s,
-
identity_confidence: identity_verification[:confidence].to_f.round(2),
-
source: "verified_story_insight_builder"
-
}
-
end
-
-
def normalize_ocr_blocks(value)
-
Array(value).filter_map do |row|
-
next unless row.is_a?(Hash)
-
text = normalize_text(row[:text] || row["text"], max: 180)
-
next if text.blank?
-
confidence = (row[:confidence] || row["confidence"]).to_f
-
next if confidence.positive? && confidence < MIN_OCR_BLOCK_CONFIDENCE
-
-
{
-
text: text,
-
confidence: confidence,
-
source: (row[:source] || row["source"]).to_s.presence || "ocr",
-
timestamp: row[:timestamp] || row["timestamp"]
-
}.compact
-
end
-
end
-
-
def normalize_object_detections(value)
-
Array(value).filter_map do |row|
-
next unless row.is_a?(Hash)
-
label = normalize_text(row[:label] || row["label"] || row[:description] || row["description"], max: 80)&.downcase
-
next if label.blank?
-
confidence = (row[:confidence] || row["confidence"] || row[:score] || row["score"] || row[:max_confidence] || row["max_confidence"]).to_f
-
next if confidence.positive? && confidence < MIN_OBJECT_CONFIDENCE
-
-
{
-
label: label,
-
confidence: confidence,
-
timestamps: Array(row[:timestamps] || row["timestamps"]).map(&:to_f).first(20)
-
}
-
end.uniq { |row| [row[:label], row[:timestamps]] }
-
end
-
-
def normalize_scenes(value)
-
Array(value).filter_map do |row|
-
next unless row.is_a?(Hash)
-
scene_type = normalize_text(row[:type] || row["type"], max: 60)
-
next if scene_type.blank?
-
-
{
-
type: scene_type.downcase,
-
timestamp: row[:timestamp] || row["timestamp"],
-
correlation: row[:correlation] || row["correlation"]
-
}.compact
-
end
-
end
-
-
def normalize_objects(raw_objects, object_detections:)
-
from_objects = Array(raw_objects).map { |row| normalize_text(row, max: 80) }.compact.map(&:downcase)
-
from_detections = Array(object_detections).map { |row| row[:label].to_s.downcase }.reject(&:blank?)
-
(from_objects + from_detections).uniq.first(40)
-
end
-
-
def normalize_topics(raw_topics, objects:, hashtags:)
-
from_topics = Array(raw_topics).map { |row| normalize_text(row, max: 80) }.compact.map(&:downcase)
-
from_hashtags = Array(hashtags).map { |tag| tag.to_s.delete_prefix("#").downcase }
-
(from_topics + objects + from_hashtags).reject(&:blank?).uniq.first(40)
-
end
-
-
def normalize_faces
-
people_rows = Array(@raw[:people] || @raw["people"]).filter_map do |row|
-
next unless row.is_a?(Hash)
-
role = (row[:role] || row["role"]).to_s
-
next if role.blank?
-
-
{
-
person_id: row[:person_id] || row["person_id"],
-
role: role,
-
similarity: (row[:similarity] || row["similarity"]).to_f,
-
label: (row[:label] || row["label"]).to_s.presence,
-
age: (row[:age] || row["age"]).to_f.positive? ? (row[:age] || row["age"]).to_f.round(1) : nil,
-
age_range: (row[:age_range] || row["age_range"]).to_s.presence,
-
gender: (row[:gender] || row["gender"]).to_s.presence,
-
gender_score: (row[:gender_score] || row["gender_score"]).to_f
-
}.compact
-
end
-
-
total_count = (@raw[:face_count] || @raw["face_count"]).to_i
-
total_count = [total_count, people_rows.size].max
-
primary_user_count = people_rows.count { |row| row[:role].to_s == "primary_user" }
-
secondary_person_count = people_rows.count { |row| row[:role].to_s == "secondary_person" }
-
unknown_count = [total_count - (primary_user_count + secondary_person_count), 0].max
-
-
{
-
total_count: total_count,
-
primary_user_count: primary_user_count,
-
secondary_person_count: secondary_person_count,
-
unknown_count: unknown_count,
-
people: people_rows
-
}
-
end
-
-
def build_identity_verification(faces:, topics:, detected_usernames:, source_profile_references:)
-
profile_username = normalize_username(@profile&.username)
-
people = faces.is_a?(Hash) ? Array(faces[:people]) : []
-
person_ids = people.map { |row| row[:person_id] }.compact
-
people_index = if @profile&.respond_to?(:instagram_story_people)
-
@profile.instagram_story_people.where(id: person_ids).index_by(&:id)
-
else
-
{}
-
end
-
-
behavior_profile = @profile&.respond_to?(:instagram_profile_behavior_profile) ? @profile.instagram_profile_behavior_profile : nil
-
behavior_summary = behavior_profile&.behavioral_summary
-
behavior_summary = behavior_summary.is_a?(Hash) ? behavior_summary : {}
-
face_identity_profile = behavior_summary["face_identity_profile"].is_a?(Hash) ? behavior_summary["face_identity_profile"] : {}
-
historical_primary_person_id = face_identity_profile["person_id"] || face_identity_profile[:person_id]
-
-
primary_person_present = people.any? { |row| row[:role].to_s == "primary_user" }
-
recurring_primary_person = historical_primary_person_id.present? && people.any? { |row| row[:person_id].to_s == historical_primary_person_id.to_s }
-
profile_topics = extract_profile_bio_topics
-
topic_overlap = (profile_topics & Array(topics).map { |value| value.to_s.downcase.strip }.reject(&:blank?)).first(8)
-
-
normalized_usernames = Array(detected_usernames).map { |value| normalize_username(value) }.reject(&:blank?)
-
normalized_refs = Array(source_profile_references).map { |value| normalize_username(value) }.reject(&:blank?)
-
profile_username_match = profile_username.present? && (normalized_usernames.include?(profile_username) || normalized_refs.include?(profile_username))
-
external_reference_detected = (normalized_usernames + normalized_refs).uniq.any? { |value| value != profile_username }
-
-
gender_consistency, observed_gender = face_gender_consistency(
-
people: people,
-
people_index: people_index,
-
primary_person_id: historical_primary_person_id
-
)
-
age_consistency, observed_age_range = face_age_consistency(
-
people: people,
-
people_index: people_index,
-
primary_person_id: historical_primary_person_id
-
)
-
-
confidence = 0.32
-
confidence += 0.25 if primary_person_present
-
confidence += 0.22 if recurring_primary_person
-
confidence += 0.12 if profile_username_match
-
confidence += 0.09 if topic_overlap.any?
-
confidence += 0.06 if gender_consistency == "consistent"
-
confidence += 0.06 if age_consistency == "consistent"
-
confidence -= 0.18 if !primary_person_present && people.any?
-
confidence -= 0.12 if external_reference_detected && !profile_username_match
-
confidence = confidence.clamp(0.05, 0.98).round(2)
-
-
owner_likelihood = if confidence >= 0.68
-
"high"
-
elsif confidence >= 0.45
-
"medium"
-
else
-
"low"
-
end
-
-
reason_codes = []
-
reason_codes << "primary_face_role_detected" if primary_person_present
-
reason_codes << "historical_primary_person_match" if recurring_primary_person
-
reason_codes << "profile_username_reference_detected" if profile_username_match
-
reason_codes << "bio_topic_overlap_detected" if topic_overlap.any?
-
reason_codes << "external_user_reference_detected" if external_reference_detected
-
reason_codes << "gender_consistency_#{gender_consistency}" if gender_consistency != "unknown"
-
reason_codes << "age_consistency_#{age_consistency}" if age_consistency != "unknown"
-
-
{
-
owner_likelihood: owner_likelihood,
-
confidence: confidence,
-
primary_person_present: primary_person_present,
-
recurring_primary_person: recurring_primary_person,
-
profile_username_match: profile_username_match,
-
external_reference_detected: external_reference_detected,
-
bio_topic_overlap: topic_overlap,
-
observed_gender: observed_gender,
-
observed_age_range: observed_age_range,
-
gender_consistency: gender_consistency,
-
age_consistency: age_consistency,
-
reason_codes: reason_codes.uniq.first(12)
-
}
-
end
-
-
def face_gender_consistency(people:, people_index:, primary_person_id:)
-
observed = Array(people).map { |row| row[:gender].to_s.downcase.presence }.compact
-
expected = nil
-
if primary_person_id.present?
-
person = people_index[primary_person_id]
-
expected = person&.metadata&.dig("face_attributes", "primary_gender_cue").to_s.downcase.presence
-
end
-
-
return [ "unknown", observed.first ] if expected.blank? || observed.empty?
-
return [ "consistent", observed.first ] if observed.include?(expected)
-
-
[ "inconsistent", observed.first ]
-
end
-
-
def face_age_consistency(people:, people_index:, primary_person_id:)
-
observed_ranges = Array(people).map { |row| row[:age_range].to_s.presence }.compact
-
expected = nil
-
if primary_person_id.present?
-
person = people_index[primary_person_id]
-
expected = person&.metadata&.dig("face_attributes", "primary_age_range").to_s.presence
-
end
-
-
return [ "unknown", observed_ranges.first ] if expected.blank? || observed_ranges.empty?
-
return [ "consistent", observed_ranges.first ] if observed_ranges.include?(expected)
-
-
[ "inconsistent", observed_ranges.first ]
-
end
-
-
def normalize_handle_array(values, prefix:)
-
Array(values).map do |value|
-
handle = normalize_text(value, max: 64)
-
next if handle.blank?
-
clean = handle.delete_prefix(prefix).downcase
-
next if clean.blank?
-
"#{prefix}#{clean}"
-
end.compact.uniq
-
end
-
-
def detect_usernames(mentions:, profile_handles:, ocr_text:, transcript:, metadata:)
-
rows = []
-
rows.concat(Array(mentions).map { |value| value.to_s.delete_prefix("@") })
-
rows.concat(Array(profile_handles))
-
rows.concat(extract_source_profile_references(metadata: metadata))
-
-
[ocr_text, transcript, metadata["caption"], metadata["story_ref"], metadata["story_url"], metadata["permalink"]].each do |text|
-
next if text.to_s.blank?
-
text.to_s.scan(OCR_USERNAME_REGEX).each do |match|
-
rows << match.first.to_s
-
end
-
text.to_s.scan(BARE_USERNAME_REGEX).each do |match|
-
token = match.first.to_s
-
next unless username_like_token?(token)
-
rows << token
-
end
-
end
-
-
rows.map { |value| normalize_username(value) }.reject(&:blank?).uniq
-
end
-
-
def detect_reshare_indicators(ocr_text:, transcript:, metadata:)
-
corpus = [ocr_text, transcript, metadata["caption"], metadata["story_url"], metadata["permalink"]]
-
.map(&:to_s)
-
.join("\n")
-
return [] if corpus.blank?
-
-
RESHARE_PATTERNS.filter_map do |pattern|
-
match = corpus.match(pattern)
-
match&.to_s&.downcase
-
end.uniq
-
end
-
-
def third_party_profile_link_detected?(profile_username:, metadata:)
-
return false if profile_username.blank?
-
-
links = [metadata["story_url"], metadata["permalink"]].map(&:to_s).reject(&:blank?)
-
return false if links.empty?
-
-
links.any? do |link|
-
next false unless link.include?("instagram.com/")
-
normalized = link.downcase
-
normalized.include?("/#{profile_username}/") ? false : normalized.match?(%r{instagram\.com/[a-z0-9._]+/?})
-
end
-
end
-
-
def detect_meme_markers(ocr_text:, transcript:, metadata:)
-
corpus = [ocr_text, transcript, metadata["caption"]].map(&:to_s).join("\n")
-
markers = MEME_PATTERNS.filter_map do |pattern|
-
match = corpus.match(pattern)
-
match&.to_s&.downcase
-
end
-
text_lines = corpus.lines.map(&:strip).reject(&:blank?)
-
if text_lines.length >= 2 && corpus.length >= 40
-
markers << "multi_line_overlay_text"
-
end
-
markers.uniq
-
end
-
-
def infer_share_status(profile_username_detected:, external_usernames:, external_source_refs:, reshare_hits:, meme_markers:)
-
return "owned" if profile_username_detected && external_usernames.empty? && external_source_refs.empty?
-
return "reshared" if reshare_hits.any? || meme_markers.any?
-
return "third_party" if external_usernames.any? || external_source_refs.any?
-
-
"unknown"
-
end
-
-
def extract_source_profile_references(metadata:)
-
refs = []
-
story_ref = metadata["story_ref"].to_s
-
refs << story_ref.delete_suffix(":") if story_ref.present?
-
-
[metadata["story_url"], metadata["permalink"], metadata["media_url"]].each do |value|
-
url = value.to_s
-
next if url.blank?
-
-
if (match = url.match(%r{instagram\.com/stories/([a-zA-Z0-9._]+)/?}i))
-
refs << match[1]
-
end
-
if (match = url.match(%r{instagram\.com/([a-zA-Z0-9._]+)/?}i))
-
segment = match[1].to_s.downcase
-
refs << segment unless RESERVED_IG_SEGMENTS.include?(segment)
-
end
-
end
-
-
refs
-
.map { |value| normalize_username(value) }
-
.reject(&:blank?)
-
.select { |value| valid_instagram_username?(value) }
-
.uniq
-
end
-
-
def extract_source_profile_ids(metadata:)
-
candidates = []
-
%w[source_profile_id owner_id profile_id user_id source_user_id].each do |key|
-
value = metadata[key]
-
candidates << value.to_s if value.to_s.match?(/\A\d+\z/)
-
end
-
-
story_id = metadata["story_id"].to_s
-
story_id.scan(/(?<!\w)\d{5,}(?!\w)/).each { |token| candidates << token }
-
-
candidates.map(&:to_s).reject(&:blank?).uniq.first(10)
-
end
-
-
def score_verified_signals(ocr_text:, transcript:, objects:, object_detections:, scenes:, hashtags:, mentions:, faces:)
-
score = 0
-
score += 2 if ocr_text.to_s.present?
-
score += 2 if transcript.to_s.present?
-
score += 2 if objects.any? || object_detections.any?
-
score += 1 if scenes.any?
-
score += 1 if hashtags.any? || mentions.any?
-
score += 1 if faces[:primary_user_count].to_i.positive? || faces[:secondary_person_count].to_i.positive?
-
score
-
end
-
-
def ownership_confidence(label:, reason_codes:, signal_score:)
-
value = case label.to_s
-
when "owned_by_profile" then 0.62
-
when "insufficient_evidence" then 0.9
-
when "meme_reshare" then 0.9
-
when "reshare" then 0.86
-
when "third_party_content" then 0.82
-
when "unrelated_post" then 0.76
-
else 0.6
-
end
-
value += 0.03 * reason_codes.size
-
value += 0.02 if signal_score >= 4
-
value.clamp(0.5, 0.98).round(2)
-
end
-
-
def ownership_summary(label:, external_usernames:, external_source_refs:, reshare_hits:, meme_markers:, primary_faces:, secondary_faces:, signal_score:)
-
case label.to_s
-
when "owned_by_profile"
-
"Validated as likely profile-owned content (signal score #{signal_score})."
-
when "insufficient_evidence"
-
"Insufficient verified context (signal score #{signal_score}) to generate a grounded comment."
-
when "meme_reshare"
-
hints = (meme_markers.first(2) + reshare_hits.first(2)).uniq.join(", ")
-
"Likely meme/reshared content#{hints.present? ? " (#{hints})" : ""}; excluded from comment generation."
-
when "reshare"
-
hints = reshare_hits.first(3).join(", ")
-
"Likely reshare/credited content#{hints.present? ? " (#{hints})" : ""}; skipping full comment."
-
when "third_party_content"
-
usernames = external_usernames.first(3).join(", ")
-
refs = external_source_refs.first(3).join(", ")
-
parts = []
-
parts << "account references #{usernames}" if usernames.present?
-
parts << "source refs #{refs}" if refs.present?
-
"Detected third-party content#{parts.any? ? " (#{parts.join('; ')})" : ""} with non-primary ownership signals."
-
when "unrelated_post"
-
"Detected non-primary face signals (primary=#{primary_faces}, secondary=#{secondary_faces}); post may be unrelated."
-
else
-
"Ownership could not be validated."
-
end
-
end
-
-
def extract_profile_bio_topics
-
bio = @profile&.respond_to?(:bio) ? @profile.bio.to_s.downcase : ""
-
return [] if bio.blank?
-
-
bio.scan(/[a-z0-9_]+/)
-
.reject { |token| token.length < 3 }
-
.uniq
-
.first(30)
-
end
-
-
def normalize_text(value, max:)
-
text = value.to_s.gsub(/\s+/, " ").strip
-
return nil if text.blank?
-
return text if text.length <= max
-
-
text.byteslice(0, max)
-
end
-
-
def normalize_username(value)
-
value.to_s.downcase.strip.delete_prefix("@")
-
end
-
-
def username_like_token?(token)
-
value = token.to_s
-
return false unless valid_instagram_username?(value)
-
return false unless value.include?("_") || value.include?(".")
-
-
true
-
end
-
-
def valid_instagram_username?(value)
-
token = value.to_s.downcase.strip
-
return false unless token.length.between?(3, 30)
-
return false unless token.match?(/\A[a-z0-9._]+\z/)
-
return false if token.include?("instagram.com")
-
return false if token.start_with?("www.")
-
return false if RESERVED_IG_SEGMENTS.include?(token)
-
-
true
-
end
-
end
-
end
-
class FaceDetectionService
-
DEFAULT_MIN_FACE_CONFIDENCE = ENV.fetch("FACE_DETECTION_MIN_CONFIDENCE", "0.25").to_f
-
FACE_DUPLICATE_IOU_THRESHOLD = ENV.fetch("FACE_DETECTION_DUPLICATE_IOU_THRESHOLD", "0.85").to_f
-
-
def initialize(client: nil, min_face_confidence: nil)
-
@client = client || build_local_client
-
@min_face_confidence = begin
-
value = min_face_confidence.nil? ? DEFAULT_MIN_FACE_CONFIDENCE : min_face_confidence.to_f
-
value.negative? ? 0.0 : value
-
rescue StandardError
-
DEFAULT_MIN_FACE_CONFIDENCE
-
end
-
end
-
-
def detect(media_payload:)
-
bytes = media_payload[:image_bytes]
-
return empty_result(reason: "image_bytes_missing") if bytes.blank?
-
return empty_result(reason: "local_client_unavailable") unless @client
-
-
response = @client.detect_faces_and_ocr!(
-
image_bytes: bytes,
-
usage_context: {
-
workflow: "story_processing",
-
story_id: media_payload[:story_id].to_s
-
}
-
)
-
parse_response(response)
-
rescue StandardError => e
-
empty_result(reason: "vision_error", error_message: e.message)
-
end
-
-
private
-
-
def build_local_client
-
Ai::LocalMicroserviceClient.new
-
rescue StandardError
-
nil
-
end
-
-
def parse_response(response)
-
payload = deep_stringify(response.is_a?(Hash) ? response : {})
-
nested = payload["results"].is_a?(Hash) ? payload["results"] : {}
-
-
text_from_payload = payload.dig("ocr_text").to_s
-
text_from_payload_blocks = Array(payload["text"]).map { |row| row.is_a?(Hash) ? row["text"].to_s : row.to_s }.map(&:strip).reject(&:blank?).uniq.join("\n")
-
text_from_nested = Array(nested["text"]).map { |row| row.is_a?(Hash) ? row["text"].to_s : row.to_s }.map(&:strip).reject(&:blank?).uniq.join("\n")
-
text = [text_from_payload, text_from_payload_blocks, text_from_nested].map(&:strip).reject(&:blank?).join("\n").presence
-
ocr_blocks = normalize_ocr_blocks(payload: payload, nested: nested)
-
-
location_tags = (Array(payload.dig("location_tags")) + Array(nested.dig("location_tags"))).map(&:to_s).map(&:strip).reject(&:blank?).uniq
-
content_labels = (
-
Array(payload.dig("content_labels")) +
-
Array(nested.dig("content_labels")) +
-
Array(payload["labels"]).map { |row| row.is_a?(Hash) ? (row["label"] || row["description"]) : row } +
-
Array(nested["labels"]).map { |row| row.is_a?(Hash) ? (row["label"] || row["description"]) : row }
-
).map { |value| value.to_s.downcase.strip }.reject(&:blank?).uniq
-
object_detections = normalize_object_detections(payload: payload, nested: nested)
-
scenes = (Array(payload.dig("scenes")) + Array(nested["scenes"])).map do |row|
-
next unless row.is_a?(Hash)
-
{
-
timestamp: row["timestamp"] || row[:timestamp],
-
type: (row["type"] || row[:type]).to_s.presence || "scene_change",
-
correlation: row["correlation"] || row[:correlation]
-
}.compact
-
end.compact.first(80)
-
-
mentions = (
-
Array(payload.dig("mentions")) +
-
Array(nested.dig("mentions")) +
-
text.to_s.scan(/@[a-zA-Z0-9._]+/)
-
).map(&:to_s).map(&:downcase).uniq
-
profile_handles = (
-
Array(payload.dig("profile_handles")) +
-
Array(nested.dig("profile_handles")) +
-
text.to_s.scan(/\b[a-zA-Z0-9._]{3,30}\b/)
-
).map(&:to_s)
-
.map(&:downcase)
-
.select { |token| token.include?("_") || token.include?(".") }
-
.reject { |token| token.include?("instagram.com") }
-
.uniq
-
-
hashtags = (
-
Array(payload.dig("hashtags")) +
-
Array(nested.dig("hashtags")) +
-
text.to_s.scan(/#[a-zA-Z0-9_]+/)
-
).map(&:to_s).map(&:downcase).uniq
-
-
raw_faces = (
-
Array(payload.dig("faces")) +
-
Array(nested["faces"]) +
-
Array(payload.dig("faceAnnotations")) +
-
Array(nested.dig("faceAnnotations"))
-
)
-
normalized_faces = raw_faces.map { |face| normalize_face(face) }
-
filtered_faces = normalized_faces.select { |face| keep_face?(face) }
-
faces = deduplicate_faces(filtered_faces)
-
warnings = Array(payload.dig("metadata", "warnings")) + Array(nested.dig("metadata", "warnings"))
-
metadata_reason = payload.dig("metadata", "reason").to_s.presence || nested.dig("metadata", "reason").to_s.presence
-
-
{
-
faces: faces,
-
ocr_text: text.presence,
-
ocr_blocks: ocr_blocks,
-
location_tags: location_tags.first(20),
-
content_signals: content_labels.first(30),
-
object_detections: object_detections.first(60),
-
scenes: scenes,
-
mentions: mentions.first(30),
-
hashtags: hashtags.first(30),
-
profile_handles: profile_handles.first(30),
-
metadata: {
-
source: payload.dig("metadata", "source").to_s.presence || nested.dig("metadata", "source").to_s.presence || "local_ai",
-
face_count: faces.length,
-
detected_face_count: raw_faces.length,
-
filtered_face_count: filtered_faces.length,
-
dropped_face_count: [ raw_faces.length - faces.length, 0 ].max,
-
min_face_confidence: @min_face_confidence,
-
reason: metadata_reason,
-
warnings: warnings.first(20)
-
}.compact
-
}
-
end
-
-
def normalize_face(face)
-
raw = deep_stringify(face.is_a?(Hash) ? face : {})
-
bbox = raw.dig("bounding_box") || raw.dig("bbox") || raw.dig("boundingPoly", "vertices")
-
age_value = raw["age"].to_f
-
gender_value = raw["gender"].to_s.strip.downcase
-
gender_value = nil if gender_value.blank?
-
-
{
-
confidence: (raw["confidence"] || raw["score"] || 0).to_f,
-
bounding_box: normalize_bounding_box(bbox),
-
landmarks: Array(raw.dig("landmarks") || []).first(12).filter_map do |item|
-
row = deep_stringify(item)
-
next unless row.is_a?(Hash)
-
{
-
type: (row.dig("type") || row.dig("name") || "UNKNOWN").to_s,
-
x: row.dig("x") || row.dig("position", "x"),
-
y: row.dig("y") || row.dig("position", "y"),
-
z: row.dig("z") || row.dig("position", "z")
-
}
-
end,
-
likelihoods: raw.dig("likelihoods") || {},
-
age: age_value.positive? ? age_value.round(1) : nil,
-
age_range: age_value.positive? ? age_range_for(age_value) : nil,
-
gender: gender_value,
-
gender_score: raw["gender_score"].to_f
-
}
-
end
-
-
def age_range_for(age_value)
-
age = age_value.to_i
-
return "child" if age < 13
-
return "teen" if age < 20
-
return "young_adult" if age < 30
-
return "adult" if age < 45
-
return "middle_aged" if age < 60
-
-
"senior"
-
end
-
-
def normalize_bounding_box(value)
-
if value.is_a?(Hash)
-
row = deep_stringify(value)
-
if row.key?("x1") && row.key?("y1") && row.key?("x2") && row.key?("y2")
-
{ "x1" => row["x1"].to_f, "y1" => row["y1"].to_f, "x2" => row["x2"].to_f, "y2" => row["y2"].to_f }
-
elsif row.key?("x") && row.key?("y") && row.key?("width") && row.key?("height")
-
x = row["x"].to_f
-
y = row["y"].to_f
-
width = row["width"].to_f
-
height = row["height"].to_f
-
{ "x1" => x, "y1" => y, "x2" => x + width, "y2" => y + height }
-
else
-
{}
-
end
-
elsif value.is_a?(Array) && value.length == 4 && value.first.is_a?(Numeric)
-
{ "x1" => value[0], "y1" => value[1], "x2" => value[2], "y2" => value[3] }
-
elsif value.is_a?(Array) && value.length == 4 && value.first.is_a?(Hash)
-
xs = value.map { |pt| pt["x"].to_f }
-
ys = value.map { |pt| pt["y"].to_f }
-
{ "x1" => xs.min, "y1" => ys.min, "x2" => xs.max, "y2" => ys.max }
-
elsif value.is_a?(Array) && value.length == 4 && value.first.is_a?(Array)
-
xs = value.map { |pt| pt[0].to_f }
-
ys = value.map { |pt| pt[1].to_f }
-
{ "x1" => xs.min, "y1" => ys.min, "x2" => xs.max, "y2" => ys.max }
-
else
-
{}
-
end
-
end
-
-
def empty_result(reason:, error_message: nil)
-
{
-
faces: [],
-
ocr_text: nil,
-
ocr_blocks: [],
-
location_tags: [],
-
content_signals: [],
-
object_detections: [],
-
scenes: [],
-
mentions: [],
-
hashtags: [],
-
profile_handles: [],
-
metadata: {
-
source: "local_ai",
-
reason: reason,
-
error_message: error_message.to_s.presence
-
}.compact
-
}
-
end
-
-
def normalize_ocr_blocks(payload:, nested:)
-
blocks = []
-
-
Array(payload.dig("ocr_blocks")).each do |row|
-
next unless row.is_a?(Hash)
-
text = row["text"].to_s.strip
-
next if text.blank?
-
-
blocks << {
-
text: text,
-
confidence: row["confidence"].to_f,
-
bbox: normalize_bounding_box(row["bbox"]),
-
timestamp: row["timestamp"],
-
source: row["source"].to_s.presence || "ocr"
-
}.compact
-
end
-
-
if blocks.empty?
-
(Array(payload["text"]) + Array(nested["text"])).each do |row|
-
if row.is_a?(Hash)
-
text = row["text"].to_s.strip
-
next if text.blank?
-
-
blocks << {
-
text: text,
-
confidence: row["confidence"].to_f,
-
bbox: normalize_bounding_box(row["bbox"]),
-
timestamp: row["timestamp"],
-
source: row["source"].to_s.presence || "ocr"
-
}.compact
-
else
-
text = row.to_s.strip
-
next if text.blank?
-
-
blocks << {
-
text: text,
-
confidence: 0.0,
-
bbox: {},
-
source: "ocr"
-
}
-
end
-
end
-
end
-
-
blocks.first(80)
-
end
-
-
def normalize_object_detections(payload:, nested:)
-
rows = Array(payload.dig("object_detections"))
-
rows = Array(payload["labels"]) if rows.empty?
-
rows = Array(nested["labels"]) if rows.empty?
-
-
rows.filter_map do |row|
-
entry = deep_stringify(row)
-
label = if entry.is_a?(Hash)
-
(entry["label"] || entry["description"]).to_s.strip
-
else
-
entry.to_s.strip
-
end
-
next if label.blank?
-
-
{
-
label: label.downcase,
-
confidence: entry.is_a?(Hash) ? (entry["confidence"] || entry["score"] || entry["max_confidence"]).to_f : 0.0,
-
bbox: entry.is_a?(Hash) ? normalize_bounding_box(entry["bbox"]) : {},
-
timestamps: entry.is_a?(Hash) ? Array(entry["timestamps"]).map(&:to_f).first(80) : []
-
}.compact
-
end.first(80)
-
end
-
-
def keep_face?(face)
-
return false unless face.is_a?(Hash)
-
return false unless valid_bounding_box?(face[:bounding_box])
-
-
confidence = face[:confidence].to_f
-
return false if confidence <= 0.0
-
-
confidence >= @min_face_confidence
-
end
-
-
def valid_bounding_box?(bbox)
-
row = bbox.is_a?(Hash) ? bbox : {}
-
return false if row.empty?
-
-
x1 = row["x1"].to_f
-
y1 = row["y1"].to_f
-
x2 = row["x2"].to_f
-
y2 = row["y2"].to_f
-
return false unless x2 > x1 && y2 > y1
-
-
width = x2 - x1
-
height = y2 - y1
-
width.positive? && height.positive?
-
end
-
-
def deduplicate_faces(faces)
-
accepted = []
-
-
Array(faces)
-
.sort_by { |face| [ -face[:confidence].to_f, -bounding_box_area(face[:bounding_box]) ] }
-
.each do |face|
-
duplicate = accepted.any? do |existing|
-
bounding_box_iou(existing[:bounding_box], face[:bounding_box]) >= FACE_DUPLICATE_IOU_THRESHOLD
-
end
-
next if duplicate
-
-
accepted << face
-
end
-
-
accepted
-
end
-
-
def bounding_box_area(bbox)
-
row = bbox.is_a?(Hash) ? bbox : {}
-
return 0.0 if row.empty?
-
-
width = row["x2"].to_f - row["x1"].to_f
-
height = row["y2"].to_f - row["y1"].to_f
-
return 0.0 unless width.positive? && height.positive?
-
-
width * height
-
end
-
-
def bounding_box_iou(left_bbox, right_bbox)
-
left = left_bbox.is_a?(Hash) ? left_bbox : {}
-
right = right_bbox.is_a?(Hash) ? right_bbox : {}
-
return 0.0 if left.empty? || right.empty?
-
-
x_left = [ left["x1"].to_f, right["x1"].to_f ].max
-
y_top = [ left["y1"].to_f, right["y1"].to_f ].max
-
x_right = [ left["x2"].to_f, right["x2"].to_f ].min
-
y_bottom = [ left["y2"].to_f, right["y2"].to_f ].min
-
-
inter_width = x_right - x_left
-
inter_height = y_bottom - y_top
-
return 0.0 unless inter_width.positive? && inter_height.positive?
-
-
intersection = inter_width * inter_height
-
union = bounding_box_area(left) + bounding_box_area(right) - intersection
-
return 0.0 unless union.positive?
-
-
intersection / union
-
end
-
-
def deep_stringify(value)
-
case value
-
when Hash
-
value.each_with_object({}) do |(key, child), out|
-
out[key.to_s] = deep_stringify(child)
-
end
-
when Array
-
value.map { |child| deep_stringify(child) }
-
else
-
value
-
end
-
end
-
end
-
require "base64"
-
require "digest"
-
require "json"
-
require "net/http"
-
-
class FaceEmbeddingService
-
DEFAULT_DIMENSION = 512
-
REQUEST_TIMEOUT_SECONDS = 8
-
-
def initialize(service_url: ENV["FACE_EMBEDDING_SERVICE_URL"], dimension: DEFAULT_DIMENSION)
-
@service_url = service_url.to_s.strip
-
@dimension = dimension.to_i.positive? ? dimension.to_i : DEFAULT_DIMENSION
-
end
-
-
def embed(media_payload:, face:)
-
vector = nil
-
version = nil
-
-
if @service_url.present?
-
vector = fetch_external_embedding(media_payload: media_payload, face: face)
-
version = "external_service_v1" if vector.present?
-
end
-
-
if vector.blank?
-
vector = deterministic_embedding(media_payload: media_payload, face: face)
-
version = "deterministic_v1"
-
end
-
-
{
-
vector: normalize(vector),
-
version: version
-
}
-
end
-
-
private
-
-
def fetch_external_embedding(media_payload:, face:)
-
uri = URI.parse(@service_url)
-
req = Net::HTTP::Post.new(uri.request_uri)
-
req["Content-Type"] = "application/json"
-
req["Accept"] = "application/json"
-
req.body = JSON.generate(
-
image_base64: Base64.strict_encode64(media_payload[:image_bytes].to_s),
-
bounding_box: face[:bounding_box],
-
story_id: media_payload[:story_id].to_s
-
)
-
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.use_ssl = (uri.scheme == "https")
-
http.open_timeout = REQUEST_TIMEOUT_SECONDS
-
http.read_timeout = REQUEST_TIMEOUT_SECONDS
-
res = http.request(req)
-
return nil unless res.is_a?(Net::HTTPSuccess)
-
-
parsed = JSON.parse(res.body.to_s)
-
embedding = parsed["embedding"]
-
return nil unless embedding.is_a?(Array) && embedding.any?
-
-
embedding.map(&:to_f)
-
rescue StandardError
-
nil
-
end
-
-
def deterministic_embedding(media_payload:, face:)
-
seed = [
-
media_payload[:story_id].to_s,
-
face[:bounding_box].to_h.to_json,
-
Digest::SHA256.hexdigest(media_payload[:image_bytes].to_s.byteslice(0, 8192))
-
].join(":")
-
-
out = []
-
i = 0
-
while out.length < @dimension
-
digest = Digest::SHA256.digest("#{seed}:#{i}")
-
digest.bytes.each do |byte|
-
out << ((byte.to_f / 127.5) - 1.0)
-
break if out.length >= @dimension
-
end
-
i += 1
-
end
-
out
-
end
-
-
def normalize(vector)
-
values = Array(vector).map(&:to_f)
-
return [] if values.empty?
-
-
norm = Math.sqrt(values.sum { |v| v * v })
-
return values if norm <= 0.0
-
-
values.map { |v| (v / norm).round(8) }
-
end
-
end
-
1
class FaceIdentityResolutionService
-
1
MIN_PRIMARY_APPEARANCES = 3
-
1
MIN_PRIMARY_RATIO = 0.60
-
1
FREQUENT_COLLABORATOR_CO_APPEARANCES = 3
-
1
VERY_FREQUENT_COLLABORATOR_CO_APPEARANCES = 6
-
-
RESERVED_USERNAMES = %w[
-
1
instagram
-
stories
-
p
-
reel
-
reels
-
tv
-
explore
-
accounts
-
direct
-
www
-
com
-
].freeze
-
-
1
def resolve_for_post!(post:, extracted_usernames: [], content_summary: {})
-
then: 0
else: 0
else: 0
then: 0
return { skipped: true, reason: "post_missing" } unless post&.persisted?
-
else: 0
then: 0
return { skipped: true, reason: "profile_missing" } unless post.instagram_profile
-
-
resolve_for_source!(
-
source: post,
-
source_type: "post",
-
profile: post.instagram_profile,
-
account: post.instagram_account,
-
extracted_usernames: extracted_usernames,
-
content_summary: content_summary
-
)
-
end
-
-
1
def resolve_for_story!(story:, extracted_usernames: [], content_summary: {})
-
then: 0
else: 0
else: 0
then: 0
return { skipped: true, reason: "story_missing" } unless story&.persisted?
-
else: 0
then: 0
return { skipped: true, reason: "profile_missing" } unless story.instagram_profile
-
-
resolve_for_source!(
-
source: story,
-
source_type: "story",
-
profile: story.instagram_profile,
-
account: story.instagram_account,
-
extracted_usernames: extracted_usernames,
-
content_summary: content_summary
-
)
-
end
-
-
1
private
-
-
1
def resolve_for_source!(source:, source_type:, profile:, account:, extracted_usernames:, content_summary:)
-
faces = source_faces(source: source, source_type: source_type)
-
usernames = collect_usernames(
-
profile: profile,
-
source: source,
-
extracted_usernames: extracted_usernames,
-
content_summary: content_summary
-
)
-
-
participants, unknown_face_count = build_participants(faces)
-
apply_username_links!(participants: participants, usernames: usernames, profile: profile)
-
-
stats = profile_face_stats(profile: profile)
-
primary_identity = promote_primary_identity!(profile: profile, stats: stats)
-
participants = refresh_participants_with_latest_people(participants: participants, profile: profile)
-
apply_username_links!(participants: participants, usernames: usernames, profile: profile)
-
sync_source_face_roles!(source: source, source_type: source_type)
-
-
collaborator_index = build_collaborator_index(profile: profile, primary_person_id: primary_identity[:person_id])
-
update_collaborator_relationships!(profile: profile, collaborator_index: collaborator_index)
-
-
username_matches = map_usernames_to_people(profile: profile, usernames: usernames)
-
participants = enrich_participants(
-
participants: participants,
-
stats: stats,
-
collaborator_index: collaborator_index
-
)
-
participants_payload = participants.map { |row| row.except(:person) }
-
-
summary_text = build_summary_text(
-
profile: profile,
-
participants: participants_payload,
-
primary_identity: primary_identity,
-
usernames: usernames,
-
unknown_face_count: unknown_face_count
-
)
-
-
summary = {
-
source_type: source_type,
-
source_id: source.id,
-
extracted_usernames: usernames,
-
unknown_face_count: unknown_face_count,
-
participants: participants_payload,
-
primary_identity: primary_identity,
-
username_face_matches: username_matches,
-
participant_summary_text: summary_text,
-
resolved_at: Time.current.iso8601
-
}
-
-
persist_profile_face_identity!(
-
profile: profile,
-
primary_identity: primary_identity,
-
collaborator_index: collaborator_index,
-
username_matches: username_matches
-
)
-
persist_source_summary!(source: source, source_type: source_type, summary: summary)
-
-
{ skipped: false, summary: summary }
-
rescue StandardError => e
-
{
-
skipped: true,
-
reason: "face_identity_resolution_error",
-
error: e.message.to_s
-
}
-
end
-
-
1
def source_faces(source:, source_type:)
-
case source_type
-
when: 0
when "post"
-
source.instagram_post_faces.includes(:instagram_story_person).to_a
-
when: 0
when "story"
-
source.instagram_story_faces.includes(:instagram_story_person).to_a
-
else: 0
else
-
[]
-
end
-
end
-
-
1
def collect_usernames(profile:, source:, extracted_usernames:, content_summary:)
-
rows = []
-
rows.concat(Array(extracted_usernames))
-
-
then: 0
else: 0
content = content_summary.is_a?(Hash) ? content_summary : {}
-
rows.concat(Array(content[:mentions] || content["mentions"]))
-
rows.concat(Array(content[:profile_handles] || content["profile_handles"]))
-
rows.concat(extract_usernames_from_text(content[:ocr_text] || content["ocr_text"]))
-
-
then: 0
else: 0
metadata = source.metadata.is_a?(Hash) ? source.metadata : {}
-
rows.concat(Array(metadata["mentions"]))
-
rows.concat(Array(metadata["profile_handles"]))
-
rows.concat(extract_usernames_from_text(metadata["ocr_text"]))
-
rows.concat(extract_usernames_from_url(metadata["story_url"]))
-
rows.concat(extract_usernames_from_url(metadata["permalink"]))
-
-
then: 0
else: 0
if metadata["story_ref"].to_s.present?
-
rows << metadata["story_ref"].to_s.delete_suffix(":")
-
end
-
-
username = normalize_username(profile.username)
-
then: 0
else: 0
rows << username if username.present?
-
-
rows
-
.map { |value| normalize_username(value) }
-
.reject(&:blank?)
-
.uniq
-
.first(40)
-
end
-
-
1
def extract_usernames_from_text(text)
-
value = text.to_s
-
then: 0
else: 0
return [] if value.blank?
-
-
usernames = []
-
value.scan(/@([a-zA-Z0-9._]{2,30})/) { |match| usernames << match.first }
-
value.scan(/\b([a-zA-Z0-9._]{3,30})\b/) do |match|
-
token = match.first.to_s
-
else: 0
then: 0
next unless username_like_token?(token)
-
usernames << token
-
end
-
usernames
-
end
-
-
1
def extract_usernames_from_url(url)
-
value = url.to_s
-
then: 0
else: 0
return [] if value.blank?
-
-
rows = []
-
then: 0
else: 0
if (match = value.match(%r{instagram\.com/stories/([a-zA-Z0-9._]+)/?}i))
-
rows << match[1]
-
end
-
then: 0
else: 0
if (match = value.match(%r{instagram\.com/([a-zA-Z0-9._]+)/?}i))
-
candidate = match[1].to_s
-
else: 0
then: 0
rows << candidate unless RESERVED_USERNAMES.include?(candidate.downcase)
-
end
-
rows
-
end
-
-
1
def build_participants(faces)
-
unknown = 0
-
participants = Array(faces).map do |face|
-
person = face.instagram_story_person
-
else: 0
then: 0
unless person
-
unknown += 1
-
next
-
end
-
-
{
-
person: person,
-
person_id: person.id,
-
role: person.role.to_s,
-
label: person.label.to_s.presence,
-
match_similarity: face.match_similarity.to_f,
-
detector_confidence: face.detector_confidence.to_f,
-
linked_usernames: linked_usernames(person),
-
real_person_status: person.real_person_status,
-
identity_confidence: person.identity_confidence,
-
merged_into_person_id: person.merged_into_person_id
-
}
-
end.compact
-
-
[ participants, unknown ]
-
end
-
-
1
def apply_username_links!(participants:, usernames:, profile:)
-
then: 0
else: 0
return if participants.empty?
-
then: 0
else: 0
return if usernames.empty?
-
-
primary_username = normalize_username(profile.username)
-
primary_participant = participants.find { |row| row[:role] == "primary_user" }
-
-
then: 0
else: 0
if primary_participant && primary_username.present?
-
update_person_usernames!(primary_participant[:person], [ primary_username ])
-
end
-
-
external = usernames.reject { |value| value == primary_username }
-
then: 0
else: 0
return if external.empty?
-
-
by_person_id = participants.index_by { |row| row[:person_id] }
-
alias_map = username_alias_index(profile: profile)
-
-
external.each do |username|
-
matched_person_id = alias_map[username]
-
then: 0
else: 0
if matched_person_id && by_person_id[matched_person_id]
-
update_person_usernames!(by_person_id[matched_person_id][:person], [ username ])
-
next
-
end
-
-
non_primary = participants.reject { |row| row[:role] == "primary_user" }
-
else: 0
then: 0
next unless non_primary.length == 1
-
-
update_person_usernames!(non_primary.first[:person], [ username ])
-
end
-
end
-
-
1
def update_person_usernames!(person, usernames)
-
rows = Array(usernames).map { |value| normalize_username(value) }.reject(&:blank?).uniq
-
then: 0
else: 0
return if rows.empty?
-
-
then: 0
else: 0
meta = person.metadata.is_a?(Hash) ? person.metadata.deep_dup : {}
-
linked = Array(meta["linked_usernames"]).map { |value| normalize_username(value) }.reject(&:blank?)
-
updated = (linked + rows).uniq.first(30)
-
then: 0
else: 0
return if updated == linked
-
-
then: 0
else: 0
observations = meta["username_observations"].is_a?(Hash) ? meta["username_observations"].deep_dup : {}
-
rows.each { |username| observations[username] = observations[username].to_i + 1 }
-
-
meta["linked_usernames"] = updated
-
meta["username_observations"] = observations
-
meta["last_username_linked_at"] = Time.current.iso8601
-
person.update_columns(metadata: meta, updated_at: Time.current)
-
end
-
-
1
def profile_face_stats(profile:)
-
story_counts = InstagramStoryFace
-
.joins(:instagram_story)
-
.where(instagram_stories: { instagram_profile_id: profile.id })
-
.where.not(instagram_story_person_id: nil)
-
.group(:instagram_story_person_id)
-
.count
-
-
post_counts = InstagramPostFace
-
.joins(:instagram_profile_post)
-
.where(instagram_profile_posts: { instagram_profile_id: profile.id })
-
.where.not(instagram_story_person_id: nil)
-
.group(:instagram_story_person_id)
-
.count
-
-
counts = story_counts.merge(post_counts) { |_id, left, right| left.to_i + right.to_i }
-
total = counts.values.sum
-
-
{
-
person_counts: counts,
-
total_faces: total,
-
people_by_id: profile.instagram_story_people.where(id: counts.keys).index_by(&:id)
-
}
-
end
-
-
1
def promote_primary_identity!(profile:, stats:)
-
counts = stats[:person_counts]
-
total = stats[:total_faces].to_i
-
then: 0
else: 0
return empty_primary_identity if counts.empty? || total <= 0
-
-
top_person_id, top_count = counts.max_by { |_id, count| count.to_i }
-
top_person = stats[:people_by_id][top_person_id]
-
else: 0
then: 0
return empty_primary_identity unless top_person
-
-
ratio = top_count.to_f / total.to_f
-
confirmed = top_count.to_i >= MIN_PRIMARY_APPEARANCES && ratio >= MIN_PRIMARY_RATIO
-
-
primary_person = profile.instagram_story_people.find_by(role: "primary_user")
-
-
then: 0
else: 0
if confirmed
-
InstagramStoryPerson.where(instagram_profile_id: profile.id, role: "primary_user").where.not(id: top_person.id).update_all(role: "secondary_person", updated_at: Time.current)
-
-
then: 0
else: 0
metadata = top_person.metadata.is_a?(Hash) ? top_person.metadata.deep_dup : {}
-
metadata["primary_identity"] = {
-
"confirmed" => true,
-
"dominance_ratio" => ratio.round(4),
-
"appearance_count" => top_count.to_i,
-
"updated_at" => Time.current.iso8601
-
}
-
-
top_person.update!(
-
role: "primary_user",
-
label: top_person.label.to_s.presence || profile.username.to_s,
-
metadata: metadata
-
)
-
top_person.sync_identity_confidence!
-
primary_person = top_person
-
end
-
-
candidate = primary_person || top_person
-
{
-
person_id: candidate.id,
-
confirmed: confirmed,
-
role: candidate.role,
-
label: candidate.label.to_s.presence,
-
appearance_count: counts[candidate.id].to_i,
-
total_faces: total,
-
dominance_ratio: (counts[candidate.id].to_f / total.to_f).round(4),
-
linked_usernames: linked_usernames(candidate),
-
bio_context: bio_context_tokens(profile: profile)
-
}
-
end
-
-
1
def build_collaborator_index(profile:, primary_person_id:)
-
then: 0
else: 0
return {} if primary_person_id.blank?
-
-
story_rows = InstagramStoryFace
-
.joins(:instagram_story)
-
.where(instagram_stories: { instagram_profile_id: profile.id })
-
.where.not(instagram_story_person_id: nil)
-
.pluck(:instagram_story_id, :instagram_story_person_id)
-
-
post_rows = InstagramPostFace
-
.joins(:instagram_profile_post)
-
.where(instagram_profile_posts: { instagram_profile_id: profile.id })
-
.where.not(instagram_story_person_id: nil)
-
.pluck(:instagram_profile_post_id, :instagram_story_person_id)
-
-
collaborator_counts = Hash.new(0)
-
-
story_rows.group_by(&:first).each_value do |rows|
-
people = rows.map(&:last).uniq
-
else: 0
then: 0
next unless people.include?(primary_person_id)
-
people.reject { |person_id| person_id == primary_person_id }.each { |person_id| collaborator_counts[person_id] += 1 }
-
end
-
-
post_rows.group_by(&:first).each_value do |rows|
-
people = rows.map(&:last).uniq
-
else: 0
then: 0
next unless people.include?(primary_person_id)
-
people.reject { |person_id| person_id == primary_person_id }.each { |person_id| collaborator_counts[person_id] += 1 }
-
end
-
-
collaborator_counts.transform_values do |count|
-
{
-
co_appearances_with_primary: count.to_i,
-
relationship: relationship_for_coappearance(count.to_i)
-
}
-
end
-
end
-
-
1
def update_collaborator_relationships!(profile:, collaborator_index:)
-
then: 0
else: 0
return if collaborator_index.empty?
-
-
profile.instagram_story_people.where(id: collaborator_index.keys).find_each do |person|
-
data = collaborator_index[person.id] || {}
-
then: 0
else: 0
metadata = person.metadata.is_a?(Hash) ? person.metadata.deep_dup : {}
-
metadata["relationship"] = data[:relationship]
-
metadata["co_appearances_with_primary"] = data[:co_appearances_with_primary].to_i
-
metadata["relationship_updated_at"] = Time.current.iso8601
-
person.update_columns(metadata: metadata, updated_at: Time.current)
-
end
-
end
-
-
1
def map_usernames_to_people(profile:, usernames:)
-
then: 0
else: 0
return [] if usernames.empty?
-
-
alias_map = username_alias_index(profile: profile)
-
people = profile.instagram_story_people.where(id: alias_map.values.uniq).index_by(&:id)
-
-
usernames.filter_map do |username|
-
person_id = alias_map[username]
-
else: 0
then: 0
next unless person_id
-
person = people[person_id]
-
else: 0
then: 0
next unless person
-
-
{
-
username: username,
-
person_id: person.id,
-
role: person.role,
-
label: person.label.to_s.presence,
-
then: 0
else: 0
relationship: person.metadata.is_a?(Hash) ? person.metadata["relationship"].to_s.presence : nil
-
}.compact
-
end
-
end
-
-
1
def username_alias_index(profile:)
-
map = {}
-
-
profile.instagram_story_people.find_each do |person|
-
aliases = linked_usernames(person)
-
label_alias = normalize_username(person.label)
-
then: 0
else: 0
aliases << label_alias if label_alias.present?
-
-
aliases.uniq.each do |alias_name|
-
map[alias_name] ||= person.id
-
end
-
end
-
-
map
-
end
-
-
1
def enrich_participants(participants:, stats:, collaborator_index:)
-
counts = stats[:person_counts]
-
-
participants.map do |row|
-
person = row[:person]
-
collaborator = collaborator_index[person.id] || {}
-
appearances = counts[person.id].to_i
-
role = person.role.to_s
-
row.merge(
-
role: role,
-
owner_match: role == "primary_user",
-
recurring_face: appearances > 1,
-
appearances: counts[person.id].to_i,
-
then: 0
else: 0
relationship: collaborator[:relationship] || person.metadata&.dig("relationship"),
-
co_appearances_with_primary: collaborator[:co_appearances_with_primary].to_i,
-
linked_usernames: linked_usernames(person),
-
real_person_status: person.real_person_status,
-
identity_confidence: person.identity_confidence,
-
merged_into_person_id: person.merged_into_person_id
-
)
-
end.uniq { |row| [ row[:person_id], row[:match_similarity].round(4), row[:detector_confidence].round(4) ] }
-
end
-
-
1
def refresh_participants_with_latest_people(participants:, profile:)
-
ids = participants.map { |row| row[:person_id] }.compact.uniq
-
then: 0
else: 0
return participants if ids.empty?
-
-
by_id = profile.instagram_story_people.where(id: ids).index_by(&:id)
-
participants.map do |row|
-
latest = by_id[row[:person_id]]
-
else: 0
then: 0
next row unless latest
-
-
row.merge(
-
person: latest,
-
role: latest.role.to_s,
-
label: latest.label.to_s.presence,
-
linked_usernames: linked_usernames(latest),
-
real_person_status: latest.real_person_status,
-
identity_confidence: latest.identity_confidence,
-
merged_into_person_id: latest.merged_into_person_id
-
)
-
end
-
end
-
-
1
def build_summary_text(profile:, participants:, primary_identity:, usernames:, unknown_face_count:)
-
parts = []
-
-
then: 0
else: 0
if primary_identity[:person_id].present?
-
then: 0
else: 0
state = primary_identity[:confirmed] ? "confirmed" : "candidate"
-
parts << "Primary identity #{state}: #{primary_identity[:label] || profile.username}"
-
end
-
-
then: 0
else: 0
if participants.any?
-
participant_text = participants.first(8).map do |row|
-
base = row[:label] || "person_#{row[:person_id]}"
-
rel = row[:relationship].to_s.presence
-
aliases = Array(row[:linked_usernames]).first(2)
-
detail = []
-
then: 0
else: 0
detail << rel if rel.present?
-
then: 0
else: 0
detail << "aka #{aliases.join('/') }" if aliases.any?
-
then: 0
else: 0
detail << "seen #{row[:appearances]}x" if row[:appearances].to_i.positive?
-
then: 0
else: 0
detail.empty? ? base : "#{base} (#{detail.join(', ')})"
-
end
-
parts << "Participants: #{participant_text.join('; ')}"
-
end
-
-
then: 0
else: 0
parts << "Referenced usernames: #{usernames.join(', ')}" if usernames.any?
-
then: 0
else: 0
parts << "Unknown faces: #{unknown_face_count}" if unknown_face_count.to_i.positive?
-
-
text = parts.join(". ").strip
-
text.presence || "No identifiable participants found."
-
end
-
-
1
def persist_profile_face_identity!(profile:, primary_identity:, collaborator_index:, username_matches:)
-
record = InstagramProfileBehaviorProfile.find_or_initialize_by(instagram_profile: profile)
-
-
then: 0
else: 0
summary = record.behavioral_summary.is_a?(Hash) ? record.behavioral_summary.deep_dup : {}
-
summary["face_identity_profile"] = primary_identity
-
summary["related_individuals"] = collaborator_index.map do |person_id, row|
-
person = profile.instagram_story_people.find_by(id: person_id)
-
{
-
person_id: person_id,
-
then: 0
else: 0
role: person&.role,
-
then: 0
else: 0
label: person&.label.to_s.presence,
-
relationship: row[:relationship],
-
co_appearances_with_primary: row[:co_appearances_with_primary].to_i,
-
then: 0
else: 0
linked_usernames: person ? linked_usernames(person).first(6) : []
-
}.compact
-
end
-
summary["known_username_matches"] = username_matches.first(20)
-
-
then: 0
else: 0
metadata = record.metadata.is_a?(Hash) ? record.metadata.deep_dup : {}
-
metadata["face_identity_updated_at"] = Time.current.iso8601
-
metadata["face_identity_version"] = "v1"
-
-
then: 0
else: 0
record.activity_score = record.activity_score.to_f if record.activity_score.present?
-
record.behavioral_summary = summary
-
record.metadata = metadata
-
record.save!
-
end
-
-
1
def persist_source_summary!(source:, source_type:, summary:)
-
then: 0
else: 0
metadata = source.metadata.is_a?(Hash) ? source.metadata.deep_dup : {}
-
metadata["face_identity"] = summary
-
metadata["participant_summary"] = summary[:participant_summary_text].to_s
-
metadata["participants"] = Array(summary[:participants]).first(12)
-
-
source.update_columns(metadata: metadata, updated_at: Time.current)
-
-
else: 0
then: 0
return unless source_type == "story"
-
else: 0
then: 0
return unless source.respond_to?(:source_event)
-
-
event = source.source_event
-
else: 0
then: 0
return unless event
-
-
then: 0
else: 0
event_meta = event.metadata.is_a?(Hash) ? event.metadata.deep_dup : {}
-
event_meta["face_identity"] = summary
-
event_meta["participant_summary"] = summary[:participant_summary_text].to_s
-
event.update_columns(metadata: event_meta, updated_at: Time.current)
-
rescue StandardError
-
nil
-
end
-
-
1
def sync_source_face_roles!(source:, source_type:)
-
else: 0
case source_type
-
when: 0
when "post"
-
source.instagram_post_faces.includes(:instagram_story_person).find_each do |face|
-
else: 0
then: 0
next unless face.instagram_story_person
-
then: 0
else: 0
next if face.role.to_s == face.instagram_story_person.role.to_s
-
-
face.update_columns(role: face.instagram_story_person.role.to_s, updated_at: Time.current)
-
end
-
when: 0
when "story"
-
source.instagram_story_faces.includes(:instagram_story_person).find_each do |face|
-
else: 0
then: 0
next unless face.instagram_story_person
-
then: 0
else: 0
next if face.role.to_s == face.instagram_story_person.role.to_s
-
-
face.update_columns(role: face.instagram_story_person.role.to_s, updated_at: Time.current)
-
end
-
end
-
rescue StandardError
-
nil
-
end
-
-
1
def linked_usernames(person)
-
then: 0
else: 0
data = person.metadata.is_a?(Hash) ? person.metadata : {}
-
Array(data["linked_usernames"]).map { |value| normalize_username(value) }.reject(&:blank?).uniq.first(20)
-
end
-
-
1
def relationship_for_coappearance(count)
-
then: 0
else: 0
return "very_frequent_collaborator" if count >= VERY_FREQUENT_COLLABORATOR_CO_APPEARANCES
-
then: 0
else: 0
return "frequent_collaborator" if count >= FREQUENT_COLLABORATOR_CO_APPEARANCES
-
then: 0
else: 0
return "occasional_collaborator" if count.positive?
-
-
"unknown"
-
end
-
-
1
def bio_context_tokens(profile:)
-
text = [ profile.display_name, profile.bio ].join(" ").downcase
-
then: 0
else: 0
return [] if text.blank?
-
-
stopwords = %w[the and for with this that from your our you are]
-
text.scan(/[a-z0-9_]+/)
-
.reject { |token| token.length < 3 || stopwords.include?(token) }
-
.uniq
-
.first(20)
-
end
-
-
1
def empty_primary_identity
-
{
-
person_id: nil,
-
confirmed: false,
-
role: "unknown",
-
label: nil,
-
appearance_count: 0,
-
total_faces: 0,
-
dominance_ratio: 0.0,
-
linked_usernames: [],
-
bio_context: []
-
}
-
end
-
-
1
def normalize_username(value)
-
token = value.to_s.strip.downcase
-
then: 0
else: 0
return nil if token.blank?
-
-
token = token.sub(%r{https?://(www\.)?instagram\.com/}i, "")
-
token = token.split("/").first.to_s
-
token = token.delete_prefix("@").delete_prefix("#")
-
token = token.delete_suffix(":")
-
token = token.gsub(/[^a-z0-9._]/, "")
-
then: 0
else: 0
return nil if token.blank?
-
then: 0
else: 0
return nil if RESERVED_USERNAMES.include?(token)
-
else: 0
then: 0
return nil unless token.length.between?(2, 30)
-
-
token
-
end
-
-
1
def username_like_token?(token)
-
value = token.to_s.downcase
-
else: 0
then: 0
return false unless value.match?(/\A[a-z0-9._]{3,30}\z/)
-
then: 0
else: 0
return false if RESERVED_USERNAMES.include?(value)
-
then: 0
else: 0
return false if value.include?("instagram.com")
-
-
value.include?("_") || value.include?(".")
-
end
-
end
-
1
module Instagram
-
1
class AuthenticationRequiredError < StandardError
-
end
-
end
-
require "cgi"
-
require "uri"
-
-
module Instagram
-
class AvatarUrlNormalizer
-
PLACEHOLDER_PATTERNS = [
-
/\/static\/images\/profile\//i,
-
/profile-pic-null/i,
-
/default[_-]?profile/i
-
].freeze
-
-
class << self
-
def normalize(raw_url)
-
url = CGI.unescapeHTML(raw_url.to_s).strip
-
return nil if url.blank?
-
-
if url.start_with?("//")
-
url = "https:#{url}"
-
elsif url.start_with?("/")
-
return nil
-
elsif !url.match?(%r{\Ahttps?://}i)
-
return nil unless url.match?(%r{\A[a-z0-9.-]+\.[a-z]{2,}([/:]|$)}i)
-
-
url = "https://#{url}"
-
end
-
-
uri = URI.parse(url)
-
return nil unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
-
return nil if uri.host.to_s.blank?
-
return nil if placeholder_path?(uri.path.to_s)
-
-
uri.to_s
-
rescue URI::InvalidURIError, ArgumentError
-
nil
-
end
-
-
def placeholder_path?(path)
-
normalized = path.to_s.downcase
-
return false if normalized.blank?
-
-
PLACEHOLDER_PATTERNS.any? { |pattern| normalized.match?(pattern) }
-
end
-
end
-
end
-
end
-
require "selenium-webdriver"
-
require "fileutils"
-
require "time"
-
require "net/http"
-
require "json"
-
require "cgi"
-
require "base64"
-
require "digest"
-
require "stringio"
-
require "set"
-
-
module Instagram
-
class Client
-
include StoryScraperService
-
include FeedEngagementService
-
include BrowserAutomation
-
include DirectMessagingService
-
include CommentPostingService
-
include FollowGraphFetchingService
-
include ProfileFetchingService
-
include FeedFetchingService
-
include SyncCollectionSupport
-
include StoryApiSupport
-
include CoreHelpers
-
include TaskCaptureSupport
-
include SessionRecoverySupport
-
INSTAGRAM_BASE_URL = "https://www.instagram.com".freeze
-
DEBUG_CAPTURE_DIR = Rails.root.join("log", "instagram_debug").freeze
-
STORY_INTERACTION_RETRY_DAYS = 3
-
PROFILE_FEED_PAGE_SIZE = 30
-
PROFILE_FEED_MAX_PAGES = 120
-
PROFILE_FEED_BROWSER_ITEM_CAP = 500
-
-
def initialize(account:)
-
@account = account
-
end
-
-
def manual_login!(timeout_seconds: 180)
-
with_driver(headless: false) do |driver|
-
driver.navigate.to("#{INSTAGRAM_BASE_URL}/accounts/login/")
-
wait_for_manual_login!(driver: driver, timeout_seconds: timeout_seconds)
-
-
persist_session_bundle!(driver)
-
@account.login_state = "authenticated"
-
@account.save!
-
end
-
end
-
-
def validate_session!
-
SessionValidationService.new(
-
account: @account,
-
with_driver: method(:with_driver),
-
wait_for: method(:wait_for),
-
logger: defined?(Rails) ? Rails.logger : nil
-
).call
-
end
-
-
-
def fetch_profile_analysis_dataset!(username:, posts_limit: nil, comments_limit: 8)
-
ProfileAnalysisDatasetService.new(
-
fetch_profile_details: method(:fetch_profile_details!),
-
fetch_web_profile_info: method(:fetch_web_profile_info),
-
fetch_profile_feed_items_for_analysis: method(:fetch_profile_feed_items_for_analysis),
-
extract_post_for_analysis: method(:extract_post_for_analysis),
-
enrich_missing_post_comments_via_browser: method(:enrich_missing_post_comments_via_browser!),
-
normalize_username: method(:normalize_username)
-
).call(username: username, posts_limit: posts_limit, comments_limit: comments_limit)
-
end
-
-
def fetch_profile_story_dataset!(username:, stories_limit: 20)
-
ProfileStoryDatasetService.new(
-
fetch_profile_details: method(:fetch_profile_details!),
-
fetch_web_profile_info: method(:fetch_web_profile_info),
-
fetch_story_reel: method(:fetch_story_reel),
-
extract_story_item: method(:extract_story_item),
-
normalize_username: method(:normalize_username)
-
).call(username: username, stories_limit: stories_limit)
-
end
-
-
private
-
-
def auto_engage_first_story!(driver:, story_hold_seconds:)
-
result = { attempted: false, replied: false, replied_count: 0, username: nil, story_ref: nil, processed_stories: 0 }
-
-
username = fetch_story_users_via_api.keys.first.to_s
-
if username.blank?
-
result[:reply_skipped] = true
-
result[:reply_skip_reason] = "api_story_users_unavailable"
-
return result
-
end
-
return result if username.blank?
-
-
result[:attempted] = true
-
result[:username] = username
-
-
profile = find_story_network_profile(username: username)
-
unless profile
-
capture_task_html(
-
driver: driver,
-
task_name: "auto_engage_story_out_of_network_skipped",
-
status: "ok",
-
meta: { username: username, reason: "profile_not_in_network" }
-
)
-
result[:reply_skipped] = true
-
result[:reply_skip_reason] = "profile_not_in_network"
-
return result
-
end
-
-
story_items = fetch_story_items_via_api(username: username)
-
if story_items.blank?
-
result[:reply_skipped] = true
-
result[:reply_skip_reason] = "no_story_items"
-
return result
-
end
-
-
story_items.each do |story|
-
story_id = story[:story_id].to_s
-
next if story_id.blank?
-
-
result[:processed_stories] += 1
-
story_ref = "#{username}:#{story_id}"
-
result[:story_ref] ||= story_ref
-
-
if ActiveModel::Type::Boolean.new.cast(story[:api_should_skip])
-
result[:reply_skipped] = true
-
result[:reply_skip_reason] = story[:api_external_profile_reason].to_s.presence || "api_external_profile_indicator"
-
next
-
end
-
-
can_reply = story[:can_reply]
-
if can_reply == false
-
result[:reply_skipped] = true
-
result[:reply_skip_reason] = "api_can_reply_false"
-
next
-
end
-
-
media_url = story[:media_url].to_s
-
next if media_url.blank?
-
-
download = download_media_with_metadata(url: media_url, user_agent: @account.user_agent)
-
downloaded_at = Time.current
-
downloaded_event = profile.record_event!(
-
kind: "story_media_downloaded_via_feed",
-
external_id: "story_media_downloaded_via_feed:#{story_ref}:#{downloaded_at.utc.iso8601(6)}",
-
occurred_at: downloaded_at,
-
metadata: {
-
source: "selenium_story_viewer",
-
media_source: "api_story_item",
-
media_type: story[:media_type],
-
username: username,
-
story_id: story_id,
-
story_ref: story_ref,
-
download_link: media_url,
-
media_size_bytes: download[:bytes].bytesize,
-
content_type: download[:content_type],
-
final_url: download[:final_url]
-
}
-
)
-
downloaded_event.media.attach(
-
io: StringIO.new(download[:bytes]),
-
filename: download[:filename],
-
content_type: download[:content_type]
-
)
-
InstagramProfileEvent.broadcast_story_archive_refresh!(account: @account)
-
-
payload = build_auto_engagement_post_payload(
-
profile: profile,
-
shortcode: story_ref,
-
caption: story[:caption],
-
permalink: story[:permalink].to_s.presence || "#{INSTAGRAM_BASE_URL}/stories/#{username}/#{story_id}/",
-
include_story_history: true
-
)
-
analysis = analyze_for_auto_engagement!(
-
analyzable: downloaded_event,
-
payload: payload,
-
bytes: download[:bytes],
-
content_type: download[:content_type],
-
source_url: media_url
-
)
-
suggestions = generate_comment_suggestions_from_analysis!(
-
profile: profile,
-
payload: payload,
-
analysis: analysis
-
)
-
comment_text = suggestions.first.to_s.strip
-
next if comment_text.blank?
-
-
comment_result = comment_on_story_via_api!(story_id: story_id, story_username: username, comment_text: comment_text)
-
if !comment_result[:posted]
-
driver.navigate.to("#{INSTAGRAM_BASE_URL}/stories/#{username}/#{story_id}/")
-
wait_for(driver, css: "body", timeout: 12)
-
dismiss_common_overlays!(driver)
-
freeze_story_progress!(driver)
-
comment_result = comment_on_story_via_ui!(driver: driver, comment_text: comment_text)
-
end
-
posted = comment_result[:posted]
-
sleep(story_hold_seconds.to_i) if posted
-
-
if posted
-
result[:replied] = true
-
result[:replied_count] = result[:replied_count].to_i + 1
-
profile.record_event!(
-
kind: "story_comment_posted_via_feed",
-
external_id: "story_comment_posted_via_feed:#{story_ref}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "selenium_story_viewer",
-
username: username,
-
story_id: story_id,
-
story_ref: story_ref,
-
comment_text: comment_text,
-
submission_method: comment_result[:method],
-
analysis: analysis
-
}
-
)
-
attach_reply_comment_to_downloaded_event!(downloaded_event: downloaded_event, comment_text: comment_text)
-
end
-
rescue StandardError
-
next
-
end
-
-
result
-
rescue StandardError => e
-
capture_task_html(
-
driver: driver,
-
task_name: "auto_engage_story_failed",
-
status: "error",
-
meta: { error_class: e.class.name, error_message: e.message }
-
)
-
result
-
end
-
-
def auto_engage_feed_post!(driver:, item:)
-
shortcode = item[:shortcode].to_s
-
username = normalize_username(item[:author_username].to_s)
-
profile = find_or_create_profile_for_auto_engagement!(username: username)
-
-
capture_task_html(
-
driver: driver,
-
task_name: "auto_engage_post_selected",
-
status: "ok",
-
meta: { shortcode: shortcode, username: username, media_url: item[:media_url] }
-
)
-
-
download = download_media_with_metadata(url: item[:media_url], user_agent: @account.user_agent)
-
downloaded_at = Time.current
-
downloaded_event = profile.record_event!(
-
kind: "feed_post_image_downloaded",
-
external_id: "feed_post_image_downloaded:#{shortcode}:#{downloaded_at.utc.iso8601(6)}",
-
occurred_at: downloaded_at,
-
metadata: {
-
source: "selenium_home_feed",
-
shortcode: shortcode,
-
download_link: item[:media_url],
-
original_image_size_bytes: download[:bytes].bytesize,
-
original_image_width: item.dig(:metadata, :natural_width),
-
original_image_height: item.dig(:metadata, :natural_height),
-
content_type: download[:content_type],
-
final_url: download[:final_url]
-
}
-
)
-
downloaded_event.media.attach(
-
io: StringIO.new(download[:bytes]),
-
filename: download[:filename],
-
content_type: download[:content_type]
-
)
-
-
payload = build_auto_engagement_post_payload(
-
profile: profile,
-
shortcode: shortcode,
-
caption: item[:caption],
-
permalink: "#{INSTAGRAM_BASE_URL}/p/#{shortcode}/",
-
include_story_history: false
-
)
-
analysis = analyze_for_auto_engagement!(
-
analyzable: downloaded_event,
-
payload: payload,
-
bytes: download[:bytes],
-
content_type: download[:content_type],
-
source_url: item[:media_url]
-
)
-
suggestions = generate_comment_suggestions_from_analysis!(
-
profile: profile,
-
payload: payload,
-
analysis: analysis
-
)
-
-
comment_text = suggestions.first.to_s.strip
-
posted = comment_text.present? && comment_on_post_via_ui!(driver: driver, shortcode: shortcode, comment_text: comment_text)
-
-
profile.record_event!(
-
kind: "feed_post_comment_posted",
-
external_id: "feed_post_comment_posted:#{shortcode}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "selenium_home_feed",
-
shortcode: shortcode,
-
username: username,
-
posted: posted,
-
posted_comment: comment_text,
-
generated_suggestions: suggestions.first(8),
-
analysis: analysis
-
}
-
)
-
-
{
-
shortcode: shortcode,
-
username: username,
-
comment_posted: posted,
-
posted_comment: comment_text
-
}
-
end
-
-
def find_or_create_profile_for_auto_engagement!(username:)
-
normalized = normalize_username(username)
-
raise "Feed item username is missing" if normalized.blank?
-
-
@account.instagram_profiles.find_or_create_by!(username: normalized) do |profile|
-
profile.display_name = normalized
-
profile.can_message = nil
-
end
-
end
-
-
def find_story_network_profile(username:)
-
normalized = normalize_username(username)
-
return nil if normalized.blank?
-
-
@account.instagram_profiles
-
.where(username: normalized)
-
.where("following = ? OR follows_you = ?", true, true)
-
.first
-
rescue StandardError
-
nil
-
end
-
-
def find_profile_for_interaction(username:)
-
normalized = normalize_username(username)
-
return nil if normalized.blank?
-
-
@account.instagram_profiles.where(username: normalized).first
-
rescue StandardError
-
nil
-
end
-
-
def profile_auto_reply_enabled?(profile)
-
return profile.auto_reply_enabled? if profile.respond_to?(:auto_reply_enabled?)
-
-
profile.profile_tags.where(name: [ "automatic_reply", "automatic reply", "auto_reply", "auto reply" ]).exists?
-
end
-
-
def story_viewer_ready?(dom)
-
dom.is_a?(Hash) && dom[:story_viewer_active]
-
end
-
-
def find_home_story_open_target(driver, excluded_usernames: [])
-
# First, try to capture the current page state for debugging
-
page_debug = driver.execute_script(<<~JS)
-
return {
-
url: window.location.href,
-
title: document.title,
-
storyLinks: document.querySelectorAll("a[href*='/stories/']").length,
-
storyButtons: document.querySelectorAll("[aria-label*='story' i]").length,
-
allButtons: document.querySelectorAll("button, [role='button']").length,
-
allLinks: document.querySelectorAll("a").length,
-
bodyText: document.body.innerText.slice(0, 500),
-
hasStoryTray: !!document.querySelector('[data-testid*="story"], [class*="story"], [id*="story"]')
-
};
-
JS
-
-
payload = driver.execute_script(<<~JS, excluded_usernames, page_debug)
-
const excluded = Array.isArray(arguments[0]) ? arguments[0].map((u) => (u || "").toString().toLowerCase()).filter(Boolean) : [];
-
const isVisible = (el) => {
-
if (!el) return false;
-
const s = window.getComputedStyle(el);
-
if (!s || s.display === "none" || s.visibility === "hidden" || s.opacity === "0" || s.pointerEvents === "none") return false;
-
const r = el.getBoundingClientRect();
-
return r.width > 5 && r.height > 5 && r.bottom > 0 && r.right > 0;
-
};
-
const isExcluded = (text, href) => excluded.some((u) => text.includes(u) || href.includes(`/${u}/`));
-
-
const candidates = [];
-
const add = (el, strategy) => {
-
if (!el) return;
-
try {
-
if (!isVisible(el)) return;
-
const r = el.getBoundingClientRect();
-
const topZone = r.top >= 0 && r.top < Math.max(760, window.innerHeight * 0.85);
-
if (!topZone) return;
-
const text = (el.getAttribute("aria-label") || el.textContent || "").toLowerCase();
-
const href = (el.getAttribute("href") || "").toLowerCase();
-
const liveHost = el.closest("a[href*='/live/'], [href*='/live/']");
-
if (text.includes("your story")) return;
-
if (text.includes("live") || href.includes("/live/") || liveHost) return;
-
if (isExcluded(text, href)) return;
-
candidates.push({ el, strategy, top: r.top, left: r.left, w: r.width, h: r.height, text: text.slice(0, 50), href: href.slice(0, 50) });
-
} catch (e) {
-
// Skip problematic elements
-
}
-
};
-
-
// Aggressive story detection with multiple fallback strategies
-
document.querySelectorAll("a[href*='/stories/']").forEach((el) => add(el, "href_story_link"));
-
document.querySelectorAll("button[aria-label*='story' i], [role='button'][aria-label*='story' i], a[aria-label*='story' i]").forEach((el) => add(el, "aria_story_button"));
-
document.querySelectorAll("[data-testid*='story'], [class*='story'], [id*='story']").forEach((container) => {
-
try {
-
container.querySelectorAll("a, button, [role='button'], [class*='avatar'], [class*='profile']").forEach((el) => add(el, "container_story_element"));
-
} catch (e) {}
-
});
-
-
// Ultra-fallback: any clickable element that might be a story
-
if (candidates.length === 0) {
-
document.querySelectorAll("a[href*='/'], button, [role='button']").forEach((el) => {
-
try {
-
const text = (el.getAttribute("aria-label") || el.textContent || "").toLowerCase();
-
const href = (el.getAttribute("href") || "").toLowerCase();
-
if (text.includes("story") || href.includes("story") || (text && text.length > 0 && text.length < 50)) {
-
add(el, "ultra_fallback");
-
}
-
} catch (e) {}
-
});
-
}
-
-
candidates.sort((a, b) => (a.top - b.top) || (a.left - b.left));
-
const chosen = candidates[0];
-
if (!chosen) return { found: false, count: 0, strategy: "none", debug: { candidates: candidates.length, totalStoryLinks: document.querySelectorAll("a[href*='/stories/']").length, totalStoryButtons: document.querySelectorAll("[aria-label*='story' i]").length, pageDebug: arguments[1] } };
-
-
try { chosen.el.setAttribute("data-codex-story-open", "1"); } catch (e) {}
-
return { found: true, count: candidates.length, strategy: chosen.strategy, debug: { candidates: candidates.length, chosenStrategy: chosen.strategy, chosenText: chosen.text, chosenHref: chosen.href, pageDebug: arguments[1] } };
-
JS
-
-
el = nil
-
if payload.is_a?(Hash) && payload["found"]
-
begin
-
el = driver.find_element(css: "[data-codex-story-open='1']")
-
rescue StandardError
-
el = nil
-
end
-
end
-
-
{
-
element: el,
-
count: payload.is_a?(Hash) ? payload["count"].to_i : 0,
-
strategy: payload.is_a?(Hash) ? payload["strategy"].to_s : "none",
-
debug: payload.is_a?(Hash) ? payload["debug"] : {}
-
}
-
ensure
-
begin
-
driver.execute_script("const el=document.querySelector('[data-codex-story-open=\"1\"]'); if (el) el.removeAttribute('data-codex-story-open');")
-
rescue StandardError
-
nil
-
end
-
end
-
-
def detect_home_story_carousel_probe(driver, excluded_usernames: [])
-
# Force capture page state on every probe for debugging
-
page_debug = driver.execute_script(<<~JS)
-
return {
-
url: window.location.href,
-
title: document.title,
-
storyLinks: document.querySelectorAll("a[href*='/stories/']").length,
-
storyButtons: document.querySelectorAll("[aria-label*='story' i]").length,
-
allButtons: document.querySelectorAll("button, [role='button']").length,
-
allLinks: document.querySelectorAll("a").length,
-
bodyText: document.body.innerText.slice(0, 1000),
-
hasStoryTray: !!document.querySelector('[data-testid*="story"], [class*="story"], [id*="story"]'),
-
htmlLength: document.documentElement.outerHTML.length,
-
readyState: document.readyState,
-
visibleElements: Array.from(document.querySelectorAll('*')).filter(el => {
-
try {
-
const rect = el.getBoundingClientRect();
-
return rect.width > 0 && rect.height > 0 && rect.top >= 0 && rect.top < window.innerHeight;
-
} catch(e) { return false; }
-
}).length
-
};
-
JS
-
-
# Always capture debug info
-
Rails.logger.info "Story carousel probe debug: #{page_debug.inspect}" if defined?(Rails)
-
-
anchors = driver.find_elements(css: "a[href*='/stories/']")
-
visible_anchor = anchors.find { |el| el.displayed? rescue false } || anchors.first
-
target = find_home_story_open_target(driver, excluded_usernames: excluded_usernames)
-
-
html = driver.page_source.to_s
-
Rails.logger.info "HTML length: #{html.length}, contains stories pattern: #{html.include?('stories')}" if defined?(Rails)
-
prefetch_users = extract_story_users_from_home_html(html)
-
-
result = {
-
anchor: visible_anchor,
-
target: target[:element],
-
target_count: target[:count].to_i,
-
target_strategy: target[:strategy].to_s.presence || "none",
-
anchor_count: anchors.length,
-
prefetch_count: prefetch_users.length,
-
prefetch_usernames: prefetch_users.take(12),
-
debug: target[:debug] || {},
-
page_debug: page_debug
-
}
-
-
Rails.logger.info "Carousel probe result: #{result.inspect}" if defined?(Rails)
-
result
-
rescue StandardError => e
-
Rails.logger.error "Carousel probe error: #{e.message}" if defined?(Rails)
-
{ anchor: nil, target: nil, target_count: 0, target_strategy: "none", anchor_count: 0, prefetch_count: 0, prefetch_usernames: [], debug: { error: e.message } }
-
end
-
-
def click_home_story_open_target_via_js(driver, excluded_usernames: [])
-
payload = driver.execute_script(<<~JS, excluded_usernames)
-
const excluded = Array.isArray(arguments[0]) ? arguments[0].map((u) => (u || "").toString().toLowerCase()).filter(Boolean) : [];
-
const isVisible = (el) => {
-
if (!el) return false;
-
const s = window.getComputedStyle(el);
-
if (!s || s.display === "none" || s.visibility === "hidden" || s.pointerEvents === "none") return false;
-
const r = el.getBoundingClientRect();
-
return r.width > 18 && r.height > 18 && r.bottom > 0 && r.right > 0;
-
};
-
const isExcluded = (text, href) => excluded.some((u) => text.includes(u) || href.includes(`/${u}/`));
-
-
const clickEl = (el) => {
-
try { el.scrollIntoView({ block: "center", inline: "center" }); } catch (e) {}
-
const evt = { view: window, bubbles: true, cancelable: true, composed: true, button: 0 };
-
["pointerdown", "mousedown", "mouseup", "click"].forEach((type) => {
-
try { el.dispatchEvent(new MouseEvent(type, evt)); } catch (e) {}
-
});
-
try { el.click(); } catch (e) {}
-
return true;
-
};
-
-
const candidates = [];
-
const add = (el, strategy) => {
-
if (!isVisible(el)) return;
-
const r = el.getBoundingClientRect();
-
const topZone = r.top >= 0 && r.top < Math.max(760, window.innerHeight * 0.85);
-
if (!topZone) return;
-
const text = (el.getAttribute("aria-label") || el.textContent || "").toLowerCase();
-
const href = (el.getAttribute("href") || "").toLowerCase();
-
const liveHost = el.closest("a[href*='/live/'], [href*='/live/']");
-
if (text.includes("your story")) return;
-
if (text.includes("live") || href.includes("/live/") || liveHost) return;
-
if (isExcluded(text, href)) return;
-
candidates.push({ el, strategy, top: r.top, left: r.left });
-
};
-
-
document.querySelectorAll("a[href*='/stories/']").forEach((el) => add(el, "href_story_link"));
-
document.querySelectorAll("button[aria-label*='story' i], [role='button'][aria-label*='story' i], a[aria-label*='story' i]").forEach((el) => add(el, "aria_story_button"));
-
-
candidates.sort((a, b) => (a.top - b.top) || (a.left - b.left));
-
const chosen = candidates[0];
-
if (!chosen) return { clicked: false, count: 0, strategy: "none" };
-
-
clickEl(chosen.el);
-
return { clicked: true, count: candidates.length, strategy: chosen.strategy };
-
JS
-
-
{
-
clicked: payload.is_a?(Hash) && payload["clicked"] == true,
-
count: payload.is_a?(Hash) ? payload["count"].to_i : 0,
-
strategy: payload.is_a?(Hash) ? payload["strategy"].to_s : "none"
-
}
-
rescue StandardError
-
{ clicked: false, count: 0, strategy: "none" }
-
end
-
-
def open_story_from_prefetch_usernames(driver:, usernames:, attempts:, probe:)
-
candidates = Array(usernames).map { |u| normalize_username(u) }.reject(&:blank?).uniq.take(8)
-
return false if candidates.empty?
-
-
candidates.each_with_index do |normalized, idx|
-
begin
-
driver.navigate.to("#{INSTAGRAM_BASE_URL}/stories/#{normalized}/")
-
wait_for(driver, css: "body", timeout: 12)
-
-
4.times do
-
sleep(0.6)
-
dom = extract_story_dom_context(driver)
-
if story_viewer_ready?(dom)
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_first_story_opened_prefetch_route",
-
status: "ok",
-
meta: {
-
strategy: "prefetch_username_route",
-
username: normalized,
-
candidate_index: idx,
-
candidate_count: candidates.length,
-
attempts: attempts,
-
target_count: probe[:target_count],
-
anchor_count: probe[:anchor_count],
-
prefetch_story_usernames: probe[:prefetch_count]
-
}
-
)
-
return true
-
end
-
end
-
rescue StandardError
-
nil
-
end
-
end
-
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_first_story_opened_prefetch_route",
-
status: "error",
-
meta: {
-
strategy: "prefetch_username_route",
-
attempts: attempts,
-
target_count: probe[:target_count],
-
anchor_count: probe[:anchor_count],
-
prefetch_story_usernames: probe[:prefetch_count],
-
usernames_tried: candidates
-
}
-
)
-
false
-
end
-
-
-
def current_story_context(driver)
-
url = driver.current_url.to_s
-
ref = current_story_reference(url)
-
username = ref.to_s.split(":").first.to_s
-
story_id = ref.to_s.split(":")[1].to_s
-
dom = extract_story_dom_context(driver)
-
-
if ref.blank? && dom[:og_story_url].present?
-
ref = current_story_reference(dom[:og_story_url])
-
username = ref.to_s.split(":").first.to_s if username.blank?
-
story_id = ref.to_s.split(":")[1].to_s if story_id.blank?
-
end
-
-
recovery_needed = false
-
if ref.blank?
-
fallback_username = extract_username_from_profile_like_path(url)
-
if fallback_username.present?
-
username = fallback_username
-
ref = "#{fallback_username}:#{story_id.presence || 'unknown'}"
-
recovery_needed = dom[:story_viewer_active] && !dom[:story_frame_present]
-
end
-
end
-
if dom[:story_viewer_active] && !dom[:story_frame_present]
-
# Do not treat profile-preview-like pages as valid story context.
-
ref = ""
-
story_id = ""
-
end
-
username = dom[:meta_username].to_s if username.blank? && dom[:meta_username].present?
-
-
media_signature = dom[:media_signature].to_s
-
key = if username.present? && story_id.present?
-
"#{username}:#{story_id}"
-
elsif username.present? && media_signature.present?
-
"#{username}:sig:#{media_signature}"
-
else
-
ref
-
end
-
-
{
-
ref: ref,
-
username: normalize_username(username),
-
story_id: story_id,
-
url: url,
-
story_url_recovery_needed: recovery_needed,
-
story_viewer_active: dom[:story_viewer_active],
-
story_key: key,
-
media_signature: media_signature
-
}
-
end
-
-
def normalized_story_context_for_processing(driver:, context:)
-
ctx = context.is_a?(Hash) ? context.dup : {}
-
live_url = driver.current_url.to_s
-
live_ref = current_story_reference(live_url)
-
if live_ref.present?
-
live_username = normalize_username(live_ref.to_s.split(":").first.to_s)
-
live_story_id = normalize_story_id_token(live_ref.to_s.split(":")[1].to_s)
-
ctx[:ref] = live_ref
-
ctx[:username] = live_username if live_username.present?
-
ctx[:story_id] = live_story_id if live_story_id.present?
-
end
-
-
ctx[:username] = normalize_username(ctx[:username])
-
ctx[:story_id] = normalize_story_id_token(ctx[:story_id])
-
if ctx[:username].present? && ctx[:story_id].present?
-
ctx[:ref] = "#{ctx[:username]}:#{ctx[:story_id]}"
-
ctx[:story_key] = "#{ctx[:username]}:#{ctx[:story_id]}"
-
end
-
ctx[:url] = canonical_story_url(username: ctx[:username], story_id: ctx[:story_id], fallback_url: live_url)
-
ctx
-
rescue StandardError
-
context
-
end
-
-
def recover_story_url_context!(driver:, username:, reason:)
-
clean_username = normalize_username(username)
-
return if clean_username.blank?
-
-
path = "#{INSTAGRAM_BASE_URL}/stories/#{clean_username}/"
-
driver.navigate.to(path)
-
wait_for(driver, css: "body", timeout: 12)
-
dismiss_common_overlays!(driver)
-
freeze_story_progress!(driver)
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_story_context_recovered",
-
status: "ok",
-
meta: {
-
reason: reason,
-
username: clean_username,
-
current_url: driver.current_url.to_s
-
}
-
)
-
rescue StandardError => e
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_story_context_recovery_failed",
-
status: "error",
-
meta: {
-
reason: reason,
-
username: clean_username,
-
error_class: e.class.name,
-
error_message: e.message
-
}
-
)
-
end
-
-
-
def visible_story_media_signature(driver)
-
payload = driver.execute_script(<<~JS)
-
const out = { media_signature: "", title: (document.title || "").toString() };
-
const visible = (el) => {
-
if (!el) return false;
-
const style = window.getComputedStyle(el);
-
if (!style || style.display === "none" || style.visibility === "hidden" || style.opacity === "0") return false;
-
const r = el.getBoundingClientRect();
-
return r.width > 120 && r.height > 120;
-
};
-
-
const mediaEl = Array.from(document.querySelectorAll("img,video")).find((el) => visible(el));
-
const src = mediaEl ? (mediaEl.currentSrc || mediaEl.src || mediaEl.getAttribute("src") || "") : "";
-
out.media_signature = [out.title, src].filter(Boolean).join("|").slice(0, 400);
-
return out;
-
JS
-
-
payload.is_a?(Hash) ? payload["media_signature"].to_s : ""
-
rescue StandardError
-
""
-
end
-
-
def extract_story_dom_context(driver)
-
payload = driver.execute_script(<<~JS)
-
const out = {
-
og_story_url: "",
-
meta_username: "",
-
story_viewer_active: false,
-
story_frame_present: false,
-
media_signature: ""
-
};
-
const og = document.querySelector("meta[property='og:url']");
-
const ogUrl = (og && og.content) ? og.content.toString() : "";
-
if (ogUrl.includes("/stories/")) out.og_story_url = ogUrl;
-
-
const path = window.location.pathname || "";
-
if (path.includes("/stories/")) out.story_viewer_active = true;
-
if ((document.title || "").toLowerCase().includes("story")) out.story_viewer_active = true;
-
if (out.og_story_url) out.story_viewer_active = true;
-
-
const match = out.og_story_url.match(/\\/stories\\/([A-Za-z0-9._]{1,30})/);
-
if (match && match[1]) out.meta_username = match[1];
-
-
const visible = (el) => {
-
if (!el) return false;
-
const style = window.getComputedStyle(el);
-
if (!style || style.display === "none" || style.visibility === "hidden" || style.opacity === "0") return false;
-
const r = el.getBoundingClientRect();
-
return r.width > 120 && r.height > 120;
-
};
-
const mediaEl = Array.from(document.querySelectorAll("img,video")).find((el) => visible(el));
-
const src = mediaEl ? (mediaEl.currentSrc || mediaEl.src || mediaEl.getAttribute("src") || "") : "";
-
const rect = mediaEl ? mediaEl.getBoundingClientRect() : { width: 0, height: 0 };
-
out.story_frame_present = Boolean(mediaEl && rect.width >= 220 && rect.height >= 220);
-
out.media_signature = [document.title || "", src].filter(Boolean).join("|").slice(0, 400);
-
return out;
-
JS
-
-
return {} unless payload.is_a?(Hash)
-
-
{
-
og_story_url: payload["og_story_url"].to_s,
-
meta_username: payload["meta_username"].to_s,
-
story_viewer_active: ActiveModel::Type::Boolean.new.cast(payload["story_viewer_active"]),
-
story_frame_present: ActiveModel::Type::Boolean.new.cast(payload["story_frame_present"]),
-
media_signature: payload["media_signature"].to_s
-
}
-
rescue StandardError
-
{ og_story_url: "", meta_username: "", story_viewer_active: false, story_frame_present: false, media_signature: "" }
-
end
-
-
-
def download_media_with_metadata(url:, user_agent:, redirect_limit: 3)
-
uri = URI.parse(url.to_s)
-
raise "Invalid media URL" unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
-
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.use_ssl = (uri.scheme == "https")
-
http.open_timeout = 10
-
http.read_timeout = 30
-
-
req = Net::HTTP::Get.new(uri.request_uri)
-
req["User-Agent"] = user_agent.presence || "Mozilla/5.0"
-
req["Accept"] = "*/*"
-
req["Referer"] = INSTAGRAM_BASE_URL
-
res = http.request(req)
-
-
if res.is_a?(Net::HTTPRedirection) && res["location"].present? && redirect_limit.to_i.positive?
-
return download_media_with_metadata(url: res["location"], user_agent: user_agent, redirect_limit: redirect_limit.to_i - 1)
-
end
-
-
raise "Media download failed: HTTP #{res.code}" unless res.is_a?(Net::HTTPSuccess)
-
-
body = res.body.to_s
-
raise "Downloaded media is empty" if body.blank?
-
-
content_type = res["content-type"].to_s.split(";").first.presence || "image/jpeg"
-
digest = Digest::SHA256.hexdigest("#{uri.path}-#{body.bytesize}")[0, 12]
-
{
-
bytes: body,
-
content_type: content_type,
-
filename: "feed_media_#{digest}.#{extension_for_content_type(content_type: content_type)}",
-
final_url: uri.to_s
-
}
-
end
-
-
def extension_for_content_type(content_type:)
-
return "jpg" if content_type.include?("jpeg")
-
return "png" if content_type.include?("png")
-
return "webp" if content_type.include?("webp")
-
return "gif" if content_type.include?("gif")
-
return "mp4" if content_type.include?("mp4")
-
return "mov" if content_type.include?("quicktime")
-
-
"bin"
-
end
-
-
def build_auto_engagement_post_payload(profile:, shortcode:, caption:, permalink:, include_story_history: false)
-
history = include_story_history ? recent_story_and_post_history(profile: profile) : {}
-
history_narrative = profile.history_narrative_text(max_chunks: 3)
-
history_chunks = profile.history_narrative_chunks(max_chunks: 6)
-
-
{
-
post: {
-
shortcode: shortcode,
-
caption: caption.to_s.presence,
-
taken_at: nil,
-
permalink: permalink,
-
likes_count: nil,
-
comments_count: nil,
-
comments: []
-
},
-
author_profile: {
-
username: profile.username,
-
display_name: profile.display_name,
-
bio: profile.bio,
-
can_message: profile.can_message,
-
tags: profile.profile_tags.pluck(:name).sort
-
},
-
rules: {
-
require_manual_review: false,
-
style: "gen_z_light",
-
diversity_requirement: "Avoid repeating prior story comments; generate novel phrasing.",
-
engagement_history: history,
-
historical_narrative_text: history_narrative,
-
historical_narrative_chunks: history_chunks
-
}
-
}
-
end
-
-
def analyze_for_auto_engagement!(analyzable:, payload:, bytes:, content_type:, source_url:)
-
media = build_auto_engagement_media_payload(bytes: bytes, content_type: content_type, source_url: source_url)
-
run = Ai::Runner.new(account: @account).analyze!(
-
purpose: "post",
-
analyzable: analyzable,
-
payload: payload,
-
media: media,
-
media_fingerprint: Digest::SHA256.hexdigest(bytes)
-
)
-
-
run.dig(:result, :analysis).is_a?(Hash) ? run.dig(:result, :analysis) : {}
-
rescue StandardError
-
{}
-
end
-
-
def build_auto_engagement_media_payload(bytes:, content_type:, source_url:)
-
payload = {
-
type: "image",
-
content_type: content_type,
-
bytes: bytes,
-
url: source_url.to_s
-
}
-
if bytes.bytesize <= 2 * 1024 * 1024
-
payload[:image_data_url] = "data:#{content_type};base64,#{Base64.strict_encode64(bytes)}"
-
end
-
payload
-
end
-
-
def generate_comment_suggestions_from_analysis!(profile:, payload:, analysis:)
-
preparation = ensure_profile_comment_generation_readiness(profile: profile)
-
unless ActiveModel::Type::Boolean.new.cast(preparation[:ready_for_comment_generation] || preparation["ready_for_comment_generation"])
-
log_automation_event(
-
task_name: "comment_generation_blocked_profile_preparation",
-
severity: "warn",
-
details: {
-
profile_id: profile&.id,
-
username: profile&.username,
-
reason_code: preparation[:reason_code] || preparation["reason_code"],
-
reason: preparation[:reason] || preparation["reason"]
-
}
-
)
-
return []
-
end
-
-
suggestions = Array(analysis["comment_suggestions"]).map(&:to_s).map(&:strip).reject(&:blank?).uniq
-
suggestions = ensure_story_comment_diversity(profile: profile, suggestions: suggestions)
-
return suggestions if suggestions.present?
-
-
generated = generate_google_engagement_comments!(
-
payload: payload,
-
image_description: analysis["image_description"],
-
topics: Array(analysis["topics"]),
-
author_type: analysis["author_type"].to_s
-
)
-
ensure_story_comment_diversity(profile: profile, suggestions: generated)
-
end
-
-
def ensure_profile_comment_generation_readiness(profile:)
-
return { ready_for_comment_generation: false, reason_code: "profile_missing", reason: "Profile missing." } unless profile
-
-
@profile_comment_preparation_cache ||= {}
-
cached = @profile_comment_preparation_cache[profile.id]
-
return cached if cached.is_a?(Hash)
-
-
summary = Ai::ProfileCommentPreparationService.new(
-
account: @account,
-
profile: profile,
-
posts_limit: 10,
-
comments_limit: 12
-
).prepare!
-
@profile_comment_preparation_cache[profile.id] = summary.is_a?(Hash) ? summary : {}
-
rescue StandardError => e
-
{
-
ready_for_comment_generation: false,
-
reason_code: "profile_preparation_error",
-
reason: e.message.to_s,
-
error_class: e.class.name
-
}
-
end
-
-
def recent_story_and_post_history(profile:)
-
story_items = profile.instagram_profile_events
-
.where(kind: [ "story_analyzed", "story_reply_sent", "story_comment_posted_via_feed" ])
-
.order(detected_at: :desc, id: :desc)
-
.limit(12)
-
.map do |event|
-
m = event.metadata.is_a?(Hash) ? event.metadata : {}
-
{
-
kind: event.kind,
-
story_id: m["story_id"].to_s.presence,
-
image_description: m["ai_image_description"].to_s.presence,
-
sent_comment: m["ai_reply_text"].to_s.presence || m["comment_text"].to_s.presence
-
}.compact
-
end
-
-
post_items = profile.instagram_profile_posts.recent_first.limit(8).map do |p|
-
a = p.analysis.is_a?(Hash) ? p.analysis : {}
-
{
-
shortcode: p.shortcode,
-
image_description: a["image_description"].to_s.presence,
-
topics: Array(a["topics"]).first(5)
-
}.compact
-
end
-
-
{
-
prior_story_items: story_items,
-
prior_post_items: post_items
-
}
-
end
-
-
def ensure_story_comment_diversity(profile:, suggestions:)
-
candidates = Array(suggestions).map(&:to_s).map(&:strip).reject(&:blank?).uniq
-
return [] if candidates.empty?
-
-
history = profile.instagram_profile_events
-
.where(kind: [ "story_reply_sent", "story_comment_posted_via_feed" ])
-
.order(detected_at: :desc, id: :desc)
-
.limit(40)
-
.map do |event|
-
m = event.metadata.is_a?(Hash) ? event.metadata : {}
-
m["ai_reply_text"].to_s.presence || m["comment_text"].to_s.presence
-
end
-
.compact
-
-
return candidates if history.empty?
-
-
ranked = candidates.sort_by do |candidate|
-
history.map { |past| text_similarity_score(candidate, past) }.max.to_f
-
end
-
-
unique = ranked.select { |candidate| history.all? { |past| text_similarity_score(candidate, past) < 0.72 } }
-
unique.present? ? unique : ranked
-
end
-
-
def story_already_replied?(profile:, story_id:, story_ref:, story_url:, media_url:)
-
sid = story_id.to_s.strip
-
sref = story_ref.to_s.strip
-
surl = normalize_story_permalink(story_url)
-
mkey = normalize_story_media_key(media_url)
-
-
profile.instagram_profile_events
-
.where(kind: "story_reply_sent")
-
.order(detected_at: :desc, id: :desc)
-
.limit(250)
-
.each do |event|
-
metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
-
event_sid = metadata["story_id"].to_s.strip
-
event_sref = metadata["story_ref"].to_s.strip
-
event_surl = normalize_story_permalink(metadata["story_url"])
-
event_mkey = normalize_story_media_key(metadata["media_url"])
-
-
if sid.present? && (event_sid == sid || event.external_id.to_s == "story_reply_sent:#{sid}")
-
return { found: true, matched_by: "story_id", matched_external_id: event.external_id.to_s }
-
end
-
if sref.present? && event_sref.present? && event_sref == sref
-
return { found: true, matched_by: "story_ref", matched_external_id: event.external_id.to_s }
-
end
-
if surl.present? && event_surl.present? && event_surl == surl
-
return { found: true, matched_by: "story_url", matched_external_id: event.external_id.to_s }
-
end
-
if mkey.present? && event_mkey.present? && event_mkey == mkey
-
return { found: true, matched_by: "media_url", matched_external_id: event.external_id.to_s }
-
end
-
end
-
-
{ found: false, matched_by: nil, matched_external_id: nil }
-
end
-
-
def normalize_story_permalink(url)
-
value = url.to_s.strip
-
return "" if value.blank?
-
-
begin
-
uri = URI.parse(value)
-
path = uri.path.to_s
-
rescue StandardError
-
path = value
-
end
-
-
return "" unless path.include?("/stories/")
-
path.sub(%r{/\z}, "")
-
end
-
-
def normalize_story_media_key(url)
-
value = url.to_s.strip
-
return "" if value.blank?
-
-
begin
-
uri = URI.parse(value)
-
host = uri.host.to_s
-
path = uri.path.to_s
-
return "" if host.blank? || path.blank?
-
"#{host}#{path}"
-
rescue StandardError
-
value
-
end
-
end
-
-
def text_similarity_score(a, b)
-
left = a.to_s.downcase.scan(/[a-z0-9]+/).uniq
-
right = b.to_s.downcase.scan(/[a-z0-9]+/).uniq
-
return 0.0 if left.empty? || right.empty?
-
-
(left & right).length.to_f / [ left.length, right.length ].max.to_f
-
end
-
-
def comment_on_post_via_ui!(driver:, shortcode:, comment_text:)
-
driver.navigate.to("#{INSTAGRAM_BASE_URL}/p/#{shortcode}/")
-
wait_for(driver, css: "body", timeout: 12)
-
dismiss_common_overlays!(driver)
-
capture_task_html(driver: driver, task_name: "auto_engage_post_opened", status: "ok", meta: { shortcode: shortcode })
-
-
field = wait_for_comment_textbox(driver: driver)
-
return false unless field
-
-
focus_and_type(driver: driver, field: field, text: comment_text)
-
posted = click_comment_post_button(driver: driver)
-
sleep(0.6)
-
capture_task_html(
-
driver: driver,
-
task_name: "auto_engage_post_comment_submit",
-
status: posted ? "ok" : "error",
-
meta: { shortcode: shortcode, posted: posted }
-
)
-
posted
-
rescue StandardError
-
false
-
end
-
-
def comment_on_story_via_ui!(driver:, comment_text:)
-
field = wait_for_comment_textbox(driver: driver, timeout: 12)
-
if !field
-
availability = detect_story_reply_availability(driver)
-
return {
-
posted: false,
-
reason: availability[:reason],
-
marker_text: availability[:marker_text]
-
}
-
end
-
-
capture_task_html(driver: driver, task_name: "auto_engage_story_reply_box_ready", status: "ok")
-
focus_and_type(driver: driver, field: field, text: comment_text)
-
posted = click_comment_post_button(driver: driver)
-
if posted
-
return { posted: true, reason: "post_button_clicked" }
-
end
-
-
enter_posted = send_enter_comment(driver: driver, field: field)
-
return { posted: true, reason: "submitted_with_enter" } if enter_posted
-
-
{ posted: false, reason: "submit_controls_not_found" }
-
rescue StandardError => e
-
{ posted: false, reason: "exception:#{e.class.name}" }
-
end
-
-
# API-first story reply path discovered from captured network traces:
-
# 1) POST /api/v1/direct_v2/create_group_thread/ with recipient_users=["<reel_user_id>"]
-
# 2) POST /api/v1/direct_v2/threads/broadcast/reel_share/ with media_id="<story_id>_<reel_user_id>", reel_id, thread_id, text
-
def comment_on_story_via_api!(story_id:, story_username:, comment_text:)
-
text = comment_text.to_s.strip
-
return { posted: false, method: "api", reason: "blank_comment_text" } if text.blank?
-
-
sid = story_id.to_s.strip.gsub(/[^0-9]/, "")
-
return { posted: false, method: "api", reason: "missing_story_id" } if sid.blank?
-
-
username = normalize_username(story_username)
-
return { posted: false, method: "api", reason: "missing_story_username" } if username.blank?
-
-
user_id = story_user_id_for(username: username)
-
return { posted: false, method: "api", reason: "missing_story_user_id" } if user_id.blank?
-
-
thread_id = direct_thread_id_for_user(user_id: user_id)
-
return { posted: false, method: "api", reason: "missing_thread_id" } if thread_id.blank?
-
-
payload = {
-
action: "send_item",
-
client_context: story_api_client_context,
-
media_id: "#{sid}_#{user_id}",
-
reel_id: user_id,
-
text: text,
-
thread_id: thread_id
-
}
-
-
body = ig_api_post_form_json(
-
path: "/api/v1/direct_v2/threads/broadcast/reel_share/",
-
referer: "#{INSTAGRAM_BASE_URL}/stories/#{username}/#{sid}/",
-
form: payload
-
)
-
return { posted: false, method: "api", reason: "empty_api_response" } unless body.is_a?(Hash)
-
-
status = body["status"].to_s
-
if status == "ok"
-
return {
-
posted: true,
-
method: "api",
-
reason: "reel_share_sent",
-
api_status: status,
-
api_thread_id: body.dig("payload", "thread_id").to_s.presence,
-
api_item_id: body.dig("payload", "item_id").to_s.presence
-
}
-
end
-
-
{
-
posted: false,
-
method: "api",
-
reason: body["message"].to_s.presence || body.dig("payload", "message").to_s.presence || body["error_type"].to_s.presence || "api_status_#{status.presence || 'unknown'}",
-
api_status: status.presence || "unknown",
-
api_http_status: body["_http_status"],
-
api_error_code: body.dig("payload", "error_code").to_s.presence || body["error_code"].to_s.presence
-
}
-
rescue StandardError => e
-
{ posted: false, method: "api", reason: "api_exception:#{e.class.name}" }
-
end
-
-
def story_user_id_for(username:)
-
@story_user_id_cache ||= {}
-
uname = normalize_username(username)
-
return "" if uname.blank?
-
cached = @story_user_id_cache[uname].to_s
-
return cached if cached.present?
-
-
web_info = fetch_web_profile_info(uname)
-
user = web_info.is_a?(Hash) ? web_info.dig("data", "user") : nil
-
uid = user.is_a?(Hash) ? user["id"].to_s.strip : ""
-
@story_user_id_cache[uname] = uid if uid.present?
-
uid
-
rescue StandardError
-
""
-
end
-
-
def direct_thread_id_for_user(user_id:)
-
create_direct_thread_for_user(user_id: user_id, use_cache: true)[:thread_id].to_s
-
rescue StandardError
-
""
-
end
-
-
def create_direct_thread_for_user(user_id:, use_cache: true)
-
@story_reply_thread_cache ||= {}
-
uid = user_id.to_s.strip
-
return { thread_id: "", reason: "blank_user_id" } if uid.blank?
-
-
if use_cache
-
cached = @story_reply_thread_cache[uid].to_s
-
return { thread_id: cached, reason: "cache_hit" } if cached.present?
-
end
-
-
body = ig_api_post_form_json(
-
path: "/api/v1/direct_v2/create_group_thread/",
-
referer: "#{INSTAGRAM_BASE_URL}/direct/new/",
-
form: { recipient_users: [ uid ].to_json }
-
)
-
return { thread_id: "", reason: "empty_api_response" } unless body.is_a?(Hash)
-
-
thread_id =
-
body["thread_id"].to_s.presence ||
-
body.dig("thread", "thread_id").to_s.presence ||
-
body.dig("thread", "id").to_s.presence
-
-
if thread_id.present?
-
@story_reply_thread_cache[uid] = thread_id
-
return {
-
thread_id: thread_id,
-
reason: "thread_created",
-
api_status: body["status"].to_s.presence || "ok",
-
api_http_status: body["_http_status"]
-
}
-
end
-
-
{
-
thread_id: "",
-
reason: body["message"].to_s.presence || body["error_type"].to_s.presence || "missing_thread_id",
-
api_status: body["status"].to_s.presence || "unknown",
-
api_http_status: body["_http_status"],
-
api_error_code: body["error_code"].to_s.presence || body.dig("payload", "error_code").to_s.presence
-
}
-
rescue StandardError => e
-
{ thread_id: "", reason: "api_exception:#{e.class.name}" }
-
end
-
-
def story_api_client_context
-
"#{(Time.current.to_f * 1000).to_i}#{rand(1_000_000..9_999_999)}"
-
end
-
-
def ig_api_post_form_json(path:, referer:, form:)
-
uri = URI.parse(path.to_s.start_with?("http") ? path.to_s : "#{INSTAGRAM_BASE_URL}#{path}")
-
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.use_ssl = (uri.scheme == "https")
-
http.open_timeout = 10
-
http.read_timeout = 20
-
-
req = Net::HTTP::Post.new(uri.request_uri)
-
req["User-Agent"] = @account.user_agent.presence || "Mozilla/5.0"
-
req["Accept"] = "application/json, text/plain, */*"
-
req["Content-Type"] = "application/x-www-form-urlencoded; charset=UTF-8"
-
req["X-Requested-With"] = "XMLHttpRequest"
-
req["X-IG-App-ID"] = (@account.auth_snapshot.dig("ig_app_id").presence || "936619743392459")
-
req["Referer"] = referer.to_s
-
-
csrf = @account.cookies.find { |c| c["name"].to_s == "csrftoken" }&.dig("value").to_s
-
req["X-CSRFToken"] = csrf if csrf.present?
-
req["Cookie"] = cookie_header_for(@account.cookies)
-
req.set_form_data(form.transform_values { |v| v.to_s })
-
-
res = http.request(req)
-
return nil unless res["content-type"].to_s.include?("json")
-
-
body = JSON.parse(res.body.to_s)
-
body["_http_status"] = res.code.to_i
-
body
-
rescue StandardError
-
nil
-
end
-
-
def detect_story_reply_availability(driver)
-
payload = driver.execute_script(<<~JS)
-
const out = { reason: "reply_box_not_found", marker_text: "" };
-
const norm = (value) => (value || "").toString().replace(/\\s+/g, " ").trim().toLowerCase();
-
const texts = Array.from(document.querySelectorAll("body *"))
-
.filter((el) => {
-
if (!el) return false;
-
if (el.children && el.children.length > 0) return false;
-
const r = el.getBoundingClientRect();
-
return r.width > 3 && r.height > 3;
-
})
-
.map((el) => norm(el.innerText || el.textContent))
-
.filter((t) => t.length > 0 && t.length < 140);
-
-
const joined = texts.join(" | ");
-
const matchAny = (patterns) => patterns.find((p) => joined.includes(p));
-
-
const repliesNotAllowed = matchAny([
-
"replies aren't available",
-
"replies are turned off",
-
"replies are off",
-
"can't reply to this story",
-
"you can't reply to this story",
-
"reply unavailable"
-
]);
-
if (repliesNotAllowed) {
-
out.reason = "replies_not_allowed";
-
out.marker_text = repliesNotAllowed;
-
return out;
-
}
-
-
const unavailable = matchAny([
-
"story unavailable",
-
"this story is unavailable",
-
"content unavailable",
-
"not available right now",
-
"unavailable"
-
]);
-
if (unavailable) {
-
out.reason = "reply_unavailable";
-
out.marker_text = unavailable;
-
return out;
-
}
-
-
return out;
-
JS
-
-
return { reason: "reply_box_not_found", marker_text: "" } unless payload.is_a?(Hash)
-
-
{
-
reason: payload["reason"].to_s.presence || "reply_box_not_found",
-
marker_text: payload["marker_text"].to_s
-
}
-
rescue StandardError
-
{ reason: "reply_box_not_found", marker_text: "" }
-
end
-
-
def story_reply_skip_status_for(comment_result = nil, reason: nil)
-
reason = reason.to_s if reason.present?
-
reason ||= comment_result.to_h[:reason].to_s
-
case reason
-
when "api_can_reply_false"
-
{ reason_code: "api_can_reply_false", status: "Replies not allowed (API)" }
-
when "reply_box_not_found"
-
{ reason_code: "reply_box_not_found", status: "Reply box not found" }
-
when "replies_not_allowed"
-
{ reason_code: "replies_not_allowed", status: "Replies not allowed" }
-
when "reply_unavailable"
-
{ reason_code: "reply_unavailable", status: "Unavailable" }
-
when "reply_precheck_error"
-
{ reason_code: "reply_precheck_error", status: "Unavailable" }
-
else
-
{ reason_code: "comment_submit_failed", status: "Unavailable" }
-
end
-
end
-
-
def story_reply_capability_from_api(username:, story_id:)
-
item = resolve_story_item_via_api(username: username, story_id: story_id)
-
return { known: false, reply_possible: nil, reason_code: "api_story_not_found", status: "Unknown" } unless item.is_a?(Hash)
-
-
can_reply = item[:can_reply]
-
return { known: false, reply_possible: nil, reason_code: "api_can_reply_missing", status: "Unknown" } if can_reply.nil?
-
-
if can_reply
-
{ known: true, reply_possible: true, reason_code: nil, status: "Reply available (API)" }
-
else
-
{ known: true, reply_possible: false, reason_code: "api_can_reply_false", status: "Replies not allowed (API)" }
-
end
-
rescue StandardError => e
-
{ known: false, reply_possible: nil, reason_code: "api_capability_error", status: "Unknown" }
-
end
-
-
def story_external_profile_link_context_from_api(username:, story_id:, cache: nil)
-
item = resolve_story_item_via_api(username: username, story_id: story_id, cache: cache)
-
return { known: false, has_external_profile_link: false, reason_code: "api_story_not_found", linked_username: "", linked_profile_url: "", marker_text: "", linked_targets: [] } unless item.is_a?(Hash)
-
-
has_external = ActiveModel::Type::Boolean.new.cast(item[:api_has_external_profile_indicator])
-
return { known: true, has_external_profile_link: false, reason_code: nil, linked_username: "", linked_profile_url: "", marker_text: "", linked_targets: [] } unless has_external
-
-
reason = item[:api_external_profile_reason].to_s.presence || "api_external_profile_indicator"
-
targets = Array(item[:api_external_profile_targets]).map(&:to_s).map(&:strip).reject(&:blank?).uniq
-
{
-
known: true,
-
has_external_profile_link: true,
-
reason_code: reason,
-
linked_username: "",
-
linked_profile_url: "",
-
marker_text: reason,
-
linked_targets: targets
-
}
-
rescue StandardError
-
{ known: false, has_external_profile_link: false, reason_code: "api_external_context_error", linked_username: "", linked_profile_url: "", marker_text: "", linked_targets: [] }
-
end
-
-
def check_story_reply_capability(driver:)
-
field = wait_for_comment_textbox(driver: driver, timeout: 2)
-
return { reply_possible: true, reason_code: nil, status: "Reply available", marker_text: "", submission_reason: "reply_box_found" } if field
-
-
availability = detect_story_reply_availability(driver)
-
status = story_reply_skip_status_for(reason: availability[:reason])
-
{
-
reply_possible: false,
-
reason_code: status[:reason_code],
-
status: status[:status],
-
marker_text: availability[:marker_text].to_s,
-
submission_reason: availability[:reason].to_s
-
}
-
rescue StandardError => e
-
{
-
reply_possible: false,
-
reason_code: "reply_precheck_error",
-
status: "Unavailable",
-
marker_text: "",
-
submission_reason: "exception:#{e.class.name}"
-
}
-
end
-
-
def react_to_story_if_available!(driver:)
-
payload = driver.execute_script(<<~JS)
-
const out = { reacted: false, reason: "reaction_controls_not_found", marker_text: "" };
-
const norm = (value) => (value || "").toString().replace(/\\s+/g, " ").trim().toLowerCase();
-
const isVisible = (el) => {
-
if (!el) return false;
-
const s = window.getComputedStyle(el);
-
if (!s || s.display === "none" || s.visibility === "hidden" || s.opacity === "0") return false;
-
const r = el.getBoundingClientRect();
-
if (r.width < 4 || r.height < 4) return false;
-
return r.bottom > 0 && r.top < window.innerHeight;
-
};
-
-
const candidates = Array.from(document.querySelectorAll("button, [role='button']"))
-
.filter((el) => {
-
if (!isVisible(el)) return false;
-
const r = el.getBoundingClientRect();
-
return r.top >= Math.max(0, window.innerHeight * 0.45);
-
});
-
-
const scoreFor = (el) => {
-
const text = norm(el.innerText || el.textContent);
-
const aria = norm(el.getAttribute && el.getAttribute("aria-label"));
-
const title = norm(el.getAttribute && el.getAttribute("title"));
-
const all = `${text} | ${aria} | ${title}`;
-
if (all.includes("quick reaction")) return 100;
-
if (all.includes("reaction")) return 95;
-
if (all.includes("react")) return 90;
-
if (all.includes("like")) return 75;
-
if (all.includes("heart")) return 70;
-
if (/[❤️❤🔥😍😂👏😢😮]/.test(text)) return 60;
-
return 0;
-
};
-
-
const sorted = candidates
-
.map((el) => ({ el, score: scoreFor(el) }))
-
.filter((entry) => entry.score > 0)
-
.sort((a, b) => b.score - a.score);
-
-
const chosen = sorted[0];
-
if (!chosen || !chosen.el) return out;
-
-
const marker = norm(chosen.el.innerText || chosen.el.textContent) || norm(chosen.el.getAttribute && chosen.el.getAttribute("aria-label")) || "reaction_button";
-
try {
-
chosen.el.click();
-
out.reacted = true;
-
out.reason = "reaction_button_clicked";
-
out.marker_text = marker;
-
return out;
-
} catch (e) {
-
out.reason = "reaction_click_failed";
-
out.marker_text = marker;
-
return out;
-
}
-
JS
-
-
return { reacted: false, reason: "reaction_detection_error", marker_text: "" } unless payload.is_a?(Hash)
-
-
{
-
reacted: ActiveModel::Type::Boolean.new.cast(payload["reacted"]),
-
reason: payload["reason"].to_s.presence || "reaction_controls_not_found",
-
marker_text: payload["marker_text"].to_s
-
}
-
rescue StandardError => e
-
{ reacted: false, reason: "reaction_exception:#{e.class.name}", marker_text: "" }
-
end
-
-
def dm_interaction_retry_pending?(profile)
-
return false unless profile
-
return false unless profile.dm_interaction_state.to_s == "unavailable"
-
-
retry_after = profile.dm_interaction_retry_after_at
-
retry_after.present? && retry_after > Time.current
-
end
-
-
-
-
def profile_interaction_retry_pending?(profile)
-
return false unless profile
-
return false unless profile.story_interaction_state.to_s == "unavailable"
-
-
retry_after = profile.story_interaction_retry_after_at
-
retry_after.present? && retry_after > Time.current
-
end
-
-
def mark_profile_interaction_state!(profile:, state:, reason:, reaction_available:, retry_after_at: nil)
-
return unless profile
-
-
profile.update!(
-
story_interaction_state: state.to_s.presence,
-
story_interaction_reason: reason.to_s.presence,
-
story_interaction_checked_at: Time.current,
-
story_interaction_retry_after_at: retry_after_at,
-
story_reaction_available: reaction_available.nil? ? profile.story_reaction_available : ActiveModel::Type::Boolean.new.cast(reaction_available)
-
)
-
rescue StandardError
-
nil
-
end
-
-
def attach_reply_comment_to_downloaded_event!(downloaded_event:, comment_text:)
-
return if downloaded_event.blank? || comment_text.blank?
-
-
meta = downloaded_event.metadata.is_a?(Hash) ? downloaded_event.metadata.deep_dup : {}
-
meta["reply_comment"] = comment_text.to_s
-
downloaded_event.update!(metadata: meta)
-
end
-
-
def wait_for_comment_textbox(driver:, timeout: 10)
-
Selenium::WebDriver::Wait.new(timeout: timeout).until do
-
el =
-
driver.find_elements(css: "textarea[aria-label*='comment'], textarea[aria-label*='Comment'], textarea[placeholder*='comment'], textarea[placeholder*='Comment'], textarea[placeholder*='reply'], textarea[placeholder*='Reply']").find { |x| x.displayed? rescue false } ||
-
driver.find_elements(css: "div[role='textbox'][contenteditable='true']").find { |x| x.displayed? rescue false }
-
break el if el
-
end
-
rescue Selenium::WebDriver::Error::TimeoutError
-
nil
-
end
-
-
def focus_and_type(driver:, field:, text:)
-
begin
-
driver.execute_script("arguments[0].scrollIntoView({block:'center'});", field)
-
rescue StandardError
-
nil
-
end
-
-
begin
-
field.click
-
rescue StandardError
-
nil
-
end
-
-
if field.tag_name.to_s.downcase == "div"
-
driver.execute_script("arguments[0].focus();", field)
-
field.send_keys(text.to_s)
-
else
-
field.send_keys([:control, "a"])
-
field.send_keys(:backspace)
-
field.send_keys(text.to_s)
-
end
-
end
-
-
def click_comment_post_button(driver:)
-
button =
-
driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][normalize-space()='Post']").find { |el| element_enabled?(el) } ||
-
driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][normalize-space()='Reply']").find { |el| element_enabled?(el) } ||
-
driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][contains(translate(normalize-space(.), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'post')]").find { |el| element_enabled?(el) } ||
-
driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][contains(translate(normalize-space(.), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'reply')]").find { |el| element_enabled?(el) }
-
return false unless button
-
-
begin
-
driver.action.move_to(button).click.perform
-
rescue StandardError
-
js_click(driver, button)
-
end
-
true
-
rescue StandardError
-
false
-
end
-
-
def send_enter_comment(driver:, field:)
-
begin
-
driver.action.click(field).send_keys(:enter).perform
-
true
-
rescue StandardError
-
false
-
end
-
end
-
-
def freeze_story_progress!(driver)
-
driver.execute_script(<<~JS)
-
const pauseStory = () => {
-
try {
-
document.querySelectorAll("video").forEach((v) => {
-
try { v.pause(); } catch (e) {}
-
try { v.playbackRate = 0; } catch (e) {}
-
});
-
} catch (e) {}
-
-
try {
-
document.querySelectorAll("*").forEach((el) => {
-
if (!el || !el.style) return;
-
if (el.getAttribute("role") === "progressbar" || el.className.toString().toLowerCase().includes("progress")) {
-
try { el.style.animationPlayState = "paused"; } catch (e) {}
-
try { el.style.transitionDuration = "999999s"; } catch (e) {}
-
}
-
});
-
} catch (e) {}
-
};
-
-
pauseStory();
-
JS
-
rescue StandardError
-
nil
-
end
-
-
def normalize_story_id_token(value)
-
token = value.to_s.strip
-
return "" if token.blank?
-
-
token = token.split(/[?#]/).first.to_s
-
token = token.split("/").first.to_s
-
return "" if token.blank?
-
return "" if token.casecmp("unknown").zero?
-
return "" if token.casecmp("sig").zero?
-
return "" if token.start_with?("sig:")
-
-
digits = token.gsub(/\D/, "")
-
digits.presence || ""
-
rescue StandardError
-
""
-
end
-
-
def canonical_story_url(username:, story_id:, fallback_url:)
-
uname = normalize_username(username)
-
sid = normalize_story_id_token(story_id)
-
return "#{INSTAGRAM_BASE_URL}/stories/#{uname}/#{sid}/" if uname.present? && sid.present?
-
return "#{INSTAGRAM_BASE_URL}/stories/#{uname}/" if uname.present?
-
-
fallback_url.to_s
-
rescue StandardError
-
fallback_url.to_s
-
end
-
-
def story_id_hint_from_media_url(url)
-
value = url.to_s.strip
-
return "" if value.blank?
-
-
begin
-
uri = URI.parse(value)
-
query = Rack::Utils.parse_query(uri.query.to_s)
-
raw_ig_cache = query["ig_cache_key"].to_s
-
if raw_ig_cache.present?
-
decoded = Base64.decode64(CGI.unescape(raw_ig_cache)).to_s
-
if (m = decoded.match(/(\d{8,})/))
-
return m[1].to_s
-
end
-
end
-
rescue StandardError
-
nil
-
end
-
-
if (m = value.match(%r{/stories/[A-Za-z0-9._]{1,30}/(\d{8,})}))
-
return m[1].to_s
-
end
-
-
""
-
rescue StandardError
-
""
-
end
-
-
def current_story_reference(url)
-
value = url.to_s
-
return "" unless value.include?("/stories/")
-
-
rest = value.split("/stories/").last.to_s
-
username = rest.split("/").first.to_s
-
story_id = rest.split("/")[1].to_s
-
return "" if username.blank?
-
-
"#{username}:#{story_id}"
-
end
-
-
def extract_username_from_profile_like_path(url)
-
value = url.to_s
-
return "" if value.blank?
-
-
begin
-
uri = URI.parse(value)
-
path = uri.path.to_s
-
rescue StandardError
-
path = value
-
end
-
-
segment = path.split("/").reject(&:blank?).first.to_s
-
return "" if segment.blank?
-
return "" if segment.casecmp("stories").zero?
-
return "" unless segment.match?(/\A[a-zA-Z0-9._]{1,30}\z/)
-
-
segment
-
end
-
-
def ensure_story_same_or_reload!(driver:, expected_ref:, username:)
-
return if expected_ref.to_s.blank?
-
return if current_story_reference(driver.current_url.to_s) == expected_ref
-
-
story_id = expected_ref.to_s.split(":")[1].to_s
-
path = story_id.present? ? "/stories/#{username}/#{story_id}/" : "/stories/#{username}/"
-
driver.navigate.to("#{INSTAGRAM_BASE_URL}#{path}")
-
wait_for(driver, css: "body", timeout: 12)
-
dismiss_common_overlays!(driver)
-
capture_task_html(
-
driver: driver,
-
task_name: "auto_engage_story_reloaded",
-
status: "ok",
-
meta: { expected_ref: expected_ref, current_ref: current_story_reference(driver.current_url.to_s) }
-
)
-
end
-
-
def evaluate_story_image_quality(download:, media:)
-
bytes = download.is_a?(Hash) ? download[:bytes].to_s.b : "".b
-
content_type = download.is_a?(Hash) ? download[:content_type].to_s : ""
-
width = media[:width].to_i
-
height = media[:height].to_i
-
-
return { skip: true, reason: "empty_download", entropy: nil } if bytes.blank?
-
return { skip: true, reason: "too_small_bytes", entropy: nil } if bytes.bytesize < 1500
-
return { skip: true, reason: "tiny_dimensions", entropy: nil } if width.positive? && height.positive? && (width < 120 || height < 120)
-
-
entropy = bytes_entropy(bytes)
-
-
# Heuristic: placeholder/blank assets are often very small and very low entropy.
-
if content_type.start_with?("image/") && bytes.bytesize < 45_000 && entropy < 4.2
-
return { skip: true, reason: "low_entropy_small_image", entropy: entropy }
-
end
-
-
{ skip: false, reason: nil, entropy: entropy }
-
rescue StandardError
-
{ skip: false, reason: nil, entropy: nil }
-
end
-
-
def bytes_entropy(bytes)
-
data = bytes.to_s.b
-
return 0.0 if data.empty?
-
-
counts = Array.new(256, 0)
-
data.each_byte { |b| counts[b] += 1 }
-
-
len = data.bytesize.to_f
-
entropy = 0.0
-
counts.each do |count|
-
next if count.zero?
-
-
p = count / len
-
entropy -= p * Math.log2(p)
-
end
-
entropy.round(4)
-
end
-
-
def detect_story_ad_context(driver:, media: nil)
-
payload = driver.execute_script(<<~JS)
-
const out = { ad_detected: false, reason: "", marker_text: "" };
-
const explicitMarkers = [
-
"sponsored",
-
"sponsored post",
-
"sponsored content",
-
"promoted",
-
"paid partnership",
-
"advertisement"
-
];
-
const norm = (value) => (value || "").toString().replace(/\\s+/g, " ").trim().toLowerCase();
-
const isVisible = (el) => {
-
if (!el) return false;
-
const s = window.getComputedStyle(el);
-
if (!s || s.display === "none" || s.visibility === "hidden" || s.opacity === "0") return false;
-
const r = el.getBoundingClientRect();
-
if (r.width < 4 || r.height < 4) return false;
-
return r.bottom > 0 && r.top < window.innerHeight;
-
};
-
const inStoryHeaderZone = (el) => {
-
const r = el.getBoundingClientRect();
-
return r.top >= 0 && r.top <= Math.max(240, window.innerHeight * 0.38);
-
};
-
const matchesExplicitMarker = (text) => {
-
if (!text) return "";
-
for (const m of explicitMarkers) {
-
if (text === m) return m;
-
if (text.startsWith(`${m} `)) return m;
-
if (text.endsWith(` ${m}`)) return m;
-
if (text.includes(` ${m} `)) return m;
-
}
-
return "";
-
};
-
const markerRegex = /\b(sponsored|promoted|paid partnership|advertisement)\b/;
-
-
const path = (window.location && window.location.pathname || "").toLowerCase();
-
if (!path.includes("/stories/")) return out;
-
-
// Keep the search focused on story header text nodes to avoid false positives from unrelated controls.
-
const nodes = Array.from(document.querySelectorAll("header span, header a, header [role='button'], [data-testid*='story'] span, [data-testid*='story'] a"));
-
for (const node of nodes) {
-
if (!isVisible(node)) continue;
-
if (!inStoryHeaderZone(node)) continue;
-
-
const text = norm(node.innerText || node.textContent);
-
const aria = norm(node.getAttribute && node.getAttribute("aria-label"));
-
if (text.length > 60 && aria.length > 60) continue;
-
-
const marker = matchesExplicitMarker(text) || matchesExplicitMarker(aria);
-
if (!marker) continue;
-
-
out.ad_detected = true;
-
out.reason = "header_marker_match";
-
out.marker_text = text || aria || marker;
-
return out;
-
}
-
-
// Backup detector: scan concise visible labels in the top story zone.
-
// This catches some sponsored labels that are not rendered inside <header>.
-
const topNodes = Array.from(document.querySelectorAll("span, a, div, button")).filter((node) => {
-
if (!isVisible(node)) return false;
-
if (!inStoryHeaderZone(node)) return false;
-
const text = norm(node.innerText || node.textContent);
-
if (!text || text.length > 42) return false;
-
return true;
-
});
-
-
for (const node of topNodes) {
-
const text = norm(node.innerText || node.textContent);
-
const aria = norm(node.getAttribute && node.getAttribute("aria-label"));
-
const title = norm(node.getAttribute && node.getAttribute("title"));
-
const candidate = [text, aria, title].find((value) => value && markerRegex.test(value));
-
if (!candidate) continue;
-
-
out.ad_detected = true;
-
out.reason = "top_zone_marker_match";
-
out.marker_text = candidate;
-
return out;
-
}
-
-
return out;
-
JS
-
-
return { ad_detected: false, reason: "", marker_text: "", signal_source: "", signal_confidence: "", debug_hint: "" } unless payload.is_a?(Hash)
-
-
result = {
-
ad_detected: ActiveModel::Type::Boolean.new.cast(payload["ad_detected"]),
-
reason: payload["reason"].to_s,
-
marker_text: payload["marker_text"].to_s,
-
signal_source: "dom_header",
-
signal_confidence: "high",
-
debug_hint: ""
-
}
-
return result if result[:ad_detected]
-
-
media_url = media.is_a?(Hash) ? media[:url].to_s : ""
-
media_hint = ad_hint_from_media_url(media_url)
-
return result.merge(signal_source: "", signal_confidence: "", debug_hint: "") if media_hint.blank?
-
-
if media_hint[:confidence] == "high"
-
{
-
ad_detected: true,
-
reason: "media_url_ad_marker",
-
marker_text: media_hint[:marker].to_s,
-
signal_source: "media_url",
-
signal_confidence: media_hint[:confidence].to_s,
-
debug_hint: media_hint[:marker].to_s
-
}
-
else
-
{
-
ad_detected: false,
-
reason: "",
-
marker_text: "",
-
signal_source: "media_url",
-
signal_confidence: media_hint[:confidence].to_s,
-
debug_hint: media_hint[:marker].to_s
-
}
-
end
-
rescue StandardError
-
{ ad_detected: false, reason: "", marker_text: "", signal_source: "", signal_confidence: "", debug_hint: "" }
-
end
-
-
def detect_story_external_profile_link_context(driver:, current_username:)
-
current = normalize_username(current_username).to_s
-
payload = driver.execute_script(<<~JS, current)
-
const currentUsername = (arguments[0] || "").toString().trim().toLowerCase();
-
const out = { has_external_profile_link: false, linked_username: "", linked_profile_url: "", marker_text: "" };
-
const norm = (value) => (value || "").toString().replace(/\\s+/g, " ").trim();
-
const normLower = (value) => norm(value).toLowerCase();
-
const isVisible = (el) => {
-
if (!el) return false;
-
const s = window.getComputedStyle(el);
-
if (!s || s.display === "none" || s.visibility === "hidden" || s.opacity === "0") return false;
-
const r = el.getBoundingClientRect();
-
if (r.width < 8 || r.height < 8) return false;
-
return r.bottom > 0 && r.top < window.innerHeight;
-
};
-
const parseLinkedUsername = (href) => {
-
try {
-
const u = new URL(href, window.location.origin);
-
if (!/instagram\\.com$/i.test(u.hostname)) return "";
-
const segs = u.pathname.split("/").filter(Boolean);
-
if (segs.length !== 1) return "";
-
const candidate = (segs[0] || "").toLowerCase();
-
if (!/^[a-z0-9._]{1,30}$/.test(candidate)) return "";
-
return candidate;
-
} catch (e) {
-
return "";
-
}
-
};
-
-
const candidates = Array.from(document.querySelectorAll("a[href], [role='link'][href], [role='link'][data-href]"));
-
for (const el of candidates) {
-
if (!isVisible(el)) continue;
-
const href = (el.getAttribute("href") || el.getAttribute("data-href") || "").toString();
-
if (!href) continue;
-
const linked = parseLinkedUsername(href);
-
if (!linked) continue;
-
if (linked === currentUsername) continue;
-
-
const text = norm(el.innerText || el.textContent);
-
const aria = norm(el.getAttribute && el.getAttribute("aria-label"));
-
const title = norm(el.getAttribute && el.getAttribute("title"));
-
const marker = [text, aria, title].find((v) => v && v.length > 0) || linked;
-
const markerLower = normLower(marker);
-
-
// Ignore common mention-style links; they do not necessarily indicate reshared content.
-
if (markerLower.startsWith("@")) continue;
-
if (markerLower.includes("mention")) continue;
-
-
out.has_external_profile_link = true;
-
out.linked_username = linked;
-
out.linked_profile_url = href;
-
out.marker_text = marker;
-
return out;
-
}
-
-
return out;
-
JS
-
-
return { has_external_profile_link: false, linked_username: "", linked_profile_url: "", marker_text: "" } unless payload.is_a?(Hash)
-
-
{
-
has_external_profile_link: ActiveModel::Type::Boolean.new.cast(payload["has_external_profile_link"]),
-
linked_username: payload["linked_username"].to_s,
-
linked_profile_url: payload["linked_profile_url"].to_s,
-
marker_text: payload["marker_text"].to_s
-
}
-
rescue StandardError
-
{ has_external_profile_link: false, linked_username: "", linked_profile_url: "", marker_text: "" }
-
end
-
-
def ad_hint_from_media_url(url)
-
value = url.to_s.strip
-
return nil if value.blank?
-
-
down = value.downcase
-
return { marker: "_nc_ad_query", confidence: "low" } if down.include?("_nc_ad=")
-
return { marker: "ad_image_marker", confidence: "high" } if down.include?("ad_image")
-
return { marker: "ads_image_marker", confidence: "high" } if down.include?("ads_image")
-
return { marker: "ad_urlgen_marker", confidence: "high" } if down.include?("ad_urlgen")
-
return { marker: "page_instagram_web_story_marker", confidence: "low" } if down.include?("page_instagram_web_story")
-
-
uri = URI.parse(value)
-
query = Rack::Utils.parse_query(uri.query.to_s)
-
raw_efg = query["efg"].to_s
-
return nil if raw_efg.blank?
-
-
decoded = decode_urlsafe_base64(raw_efg)
-
return nil if decoded.blank?
-
-
text = decoded.downcase
-
return { marker: "efg_ad_image", confidence: "high" } if text.include?("ad_image")
-
return { marker: "efg_ads_image", confidence: "high" } if text.include?("ads_image")
-
return { marker: "efg_ad_urlgen", confidence: "high" } if text.include?("ad_urlgen")
-
return { marker: "efg_page_instagram_web_story", confidence: "low" } if text.include?("page_instagram_web_story")
-
-
nil
-
rescue StandardError
-
nil
-
end
-
-
def decode_urlsafe_base64(value)
-
src = value.to_s.tr("-_", "+/")
-
src += "=" * ((4 - (src.length % 4)) % 4)
-
Base64.decode64(src)
-
rescue StandardError
-
nil
-
end
-
-
def bool(value)
-
ActiveModel::Type::Boolean.new.cast(value)
-
end
-
-
def normalize_same_site(value)
-
token = value.to_s.strip.downcase
-
return nil if token.blank?
-
-
case token
-
when "lax" then "Lax"
-
when "strict" then "Strict"
-
when "none", "no_restriction" then "None"
-
end
-
end
-
-
def logged_out_page?(driver)
-
body = driver.page_source.to_s.downcase
-
body.include?("create an account or log in to instagram") ||
-
body.include?("\"is_logged_in\":false") ||
-
driver.find_elements(css: "input[name='username']").any?
-
rescue StandardError
-
false
-
end
-
-
def dismiss_common_overlays!(driver)
-
# Best-effort: these overlays can prevent story tray elements from being inserted in the DOM.
-
dismiss_texts = [
-
"Allow all cookies",
-
"Accept all",
-
"Only allow essential cookies",
-
"Not now",
-
"Not Now"
-
]
-
-
dismiss_texts.each do |text|
-
button = driver.find_elements(xpath: "//button[normalize-space()='#{text}']").first
-
next unless button&.displayed?
-
-
button.click
-
sleep(0.3)
-
rescue StandardError
-
next
-
end
-
end
-
-
def js_click(driver, element)
-
driver.execute_script(<<~JS, element)
-
const el = arguments[0];
-
if (!el) return false;
-
try { el.scrollIntoView({ block: "center", inline: "nearest" }); } catch (e) {}
-
try { el.click(); return true; } catch (e) {}
-
return false;
-
JS
-
end
-
-
def read_web_storage(driver, storage_name)
-
script = <<~JS
-
const s = window[#{storage_name.inspect}];
-
const out = [];
-
for (let i = 0; i < s.length; i++) {
-
const k = s.key(i);
-
out.push({ key: k, value: s.getItem(k) });
-
}
-
return out;
-
JS
-
driver.execute_script(script).map { |entry| entry.transform_keys(&:to_s) }
-
rescue StandardError
-
[]
-
end
-
-
def write_web_storage(driver, storage_name, entries)
-
safe_entries = Array(entries).map do |entry|
-
entry = entry.to_h
-
{ "key" => entry["key"] || entry[:key], "value" => entry["value"] || entry[:value] }
-
end.select { |e| e["key"].present? }
-
-
script = <<~JS
-
const s = window[#{storage_name.inspect}];
-
const entries = arguments[0] || [];
-
for (const e of entries) {
-
try { s.setItem(e.key, e.value); } catch (err) {}
-
}
-
return entries.length;
-
JS
-
driver.execute_script(script, safe_entries)
-
rescue StandardError
-
nil
-
end
-
-
-
-
-
-
-
-
-
end
-
end
-
module Instagram
-
class Client
-
module BrowserAutomation
-
def with_authenticated_driver
-
if @account.cookies.blank?
-
raise AuthenticationRequiredError, "No stored cookies. Use manual login or import cookies first."
-
end
-
-
with_driver do |driver|
-
apply_session_bundle!(driver)
-
driver.navigate.to("#{INSTAGRAM_BASE_URL}/")
-
ensure_authenticated!(driver)
-
-
result = yield(driver)
-
refresh_account_snapshot!(driver)
-
result
-
end
-
end
-
-
def with_driver(headless: env_headless?)
-
driver = Selenium::WebDriver.for(:chrome, options: chrome_options(headless: headless))
-
yield(driver)
-
ensure
-
driver&.quit
-
end
-
-
def chrome_options(headless:)
-
options = Selenium::WebDriver::Chrome::Options.new
-
options.add_argument("--window-size=1400,1200")
-
options.add_argument("--disable-notifications")
-
options.add_argument("--disable-dev-shm-usage")
-
options.add_argument("--disable-gpu")
-
options.add_argument("--remote-debugging-pipe")
-
options.add_argument("--no-sandbox")
-
options.add_argument("--headless=new") if headless
-
-
# Enable browser console + performance logs for debugging (captured into our task artifacts when available).
-
# Note: ChromeDriver support varies by version; we guard reads in `capture_task_html`.
-
options.add_option("goog:loggingPrefs", { browser: "ALL", performance: "ALL" })
-
-
# Allow an opt-in bypass for corp TLS interception setups where the Selenium Chrome instance does not
-
# trust the proxy CA. Keep this OFF by default.
-
if ActiveModel::Type::Boolean.new.cast(ENV["INSTAGRAM_CHROME_IGNORE_CERT_ERRORS"])
-
options.add_argument("--ignore-certificate-errors")
-
options.add_argument("--ignore-ssl-errors=yes")
-
end
-
-
# Sticky sessions in headless are more reliable when we keep a consistent UA.
-
if @account.user_agent.present?
-
options.add_argument("--user-agent=#{@account.user_agent}")
-
end
-
-
options
-
end
-
-
def env_headless?
-
Rails.application.config.x.instagram.headless == true
-
end
-
-
def wait_for_manual_login!(driver:, timeout_seconds:)
-
timeout_at = Time.now + timeout_seconds
-
-
loop do
-
cookie_names = driver.manage.all_cookies.map { |c| c[:name] }
-
return if cookie_names.include?("sessionid")
-
-
raise "Timed out waiting for manual Instagram login" if Time.now > timeout_at
-
-
sleep(1)
-
end
-
end
-
-
def persist_cookies!(driver)
-
@account.cookies = driver.manage.all_cookies.map { |cookie| cookie.transform_keys(&:to_s) }
-
end
-
-
def persist_session_bundle!(driver)
-
# Capture after successful 2FA and redirect to authenticated session.
-
@account.user_agent = safe_driver_value(driver) { driver.execute_script("return navigator.userAgent") }
-
-
persist_cookies!(driver)
-
@account.local_storage = read_web_storage(driver, "localStorage")
-
@account.session_storage = read_web_storage(driver, "sessionStorage")
-
ig_app_id = detect_ig_app_id(driver)
-
-
@account.auth_snapshot = {
-
captured_at: Time.current.utc.iso8601(3),
-
current_url: safe_driver_value(driver) { driver.current_url },
-
page_title: safe_driver_value(driver) { driver.title },
-
ig_app_id: ig_app_id,
-
sessionid_present: @account.cookies.any? { |c| c["name"].to_s == "sessionid" && c["value"].to_s.present? },
-
cookie_names: @account.cookies.map { |c| c["name"] }.compact.uniq.sort,
-
local_storage_keys: @account.local_storage.map { |e| e["key"] }.compact.uniq.sort,
-
session_storage_keys: @account.session_storage.map { |e| e["key"] }.compact.uniq.sort
-
}
-
end
-
-
def refresh_account_snapshot!(driver)
-
persist_session_bundle!(driver)
-
@account.save! if @account.changed?
-
rescue StandardError => e
-
Rails.logger.warn("Instagram snapshot refresh skipped: #{e.class}: #{e.message}")
-
end
-
-
def apply_session_bundle!(driver)
-
# Need a base navigation first so Chrome is on the correct domain for cookies + storage.
-
driver.navigate.to(INSTAGRAM_BASE_URL)
-
-
apply_cookies!(driver)
-
write_web_storage(driver, "localStorage", @account.local_storage)
-
write_web_storage(driver, "sessionStorage", @account.session_storage)
-
end
-
-
def detect_ig_app_id(driver)
-
script = <<~JS
-
const candidates = []
-
const push = (value) => {
-
if (value === null || typeof value === "undefined") return
-
const text = String(value)
-
const match = text.match(/\\d{8,}/)
-
if (match) candidates.push(match[0])
-
}
-
-
try { push(document.documentElement?.getAttribute("data-app-id")) } catch (e) {}
-
try { push(window._sharedData?.config?.app_id) } catch (e) {}
-
try { push(window.__initialData?.config?.app_id) } catch (e) {}
-
try { push(window.localStorage?.getItem("ig_app_id")) } catch (e) {}
-
try { push(window.localStorage?.getItem("app_id")) } catch (e) {}
-
try { push(window.sessionStorage?.getItem("ig_app_id")) } catch (e) {}
-
-
return candidates[0] || null
-
JS
-
-
detected = safe_driver_value(driver) { driver.execute_script(script) }.to_s.strip
-
return detected if detected.present?
-
-
@account.auth_snapshot.dig("ig_app_id").to_s.presence || "936619743392459"
-
rescue StandardError
-
@account.auth_snapshot.dig("ig_app_id").to_s.presence || "936619743392459"
-
end
-
-
def apply_cookies!(driver)
-
driver.navigate.to(INSTAGRAM_BASE_URL)
-
-
@account.cookies.each do |cookie|
-
next if cookie["name"].blank? || cookie["value"].blank?
-
-
sanitized_cookie = {
-
name: cookie["name"],
-
value: cookie["value"],
-
path: cookie["path"] || "/",
-
secure: bool(cookie["secure"]),
-
http_only: bool(cookie["httpOnly"])
-
}
-
-
sanitized_cookie[:domain] = cookie["domain"] if cookie["domain"].present?
-
sanitized_cookie[:same_site] = normalize_same_site(cookie["sameSite"])
-
-
if cookie["expiry"].present?
-
sanitized_cookie[:expires] = cookie["expiry"].to_i
-
elsif cookie["expires"].present?
-
sanitized_cookie[:expires] = cookie["expires"].to_i
-
end
-
-
driver.manage.add_cookie(sanitized_cookie)
-
rescue Selenium::WebDriver::Error::UnableToSetCookieError
-
# Retry without domain/same_site for host-only or incompatible cookie attributes.
-
fallback_cookie = sanitized_cookie.except(:domain, :same_site)
-
driver.manage.add_cookie(fallback_cookie)
-
rescue Selenium::WebDriver::Error::InvalidCookieDomainError
-
next
-
rescue Selenium::WebDriver::Error::UnableToSetCookieError
-
next
-
end
-
end
-
-
def ensure_authenticated!(driver)
-
with_task_capture(driver: driver, task_name: "auth_validate_session") do
-
wait_for(driver, css: "body", timeout: 10)
-
-
# Validate against inbox route because "/" can be public and still unauthenticated.
-
driver.navigate.to("#{INSTAGRAM_BASE_URL}/direct/inbox/")
-
wait_for(driver, css: "body", timeout: 10)
-
-
if driver.current_url.include?("/accounts/login") || logged_out_page?(driver)
-
raise AuthenticationRequiredError, "Stored cookies are not authenticated. Re-run Manual Browser Login or import fresh cookies."
-
end
-
end
-
end
-
-
end
-
end
-
end
-
module Instagram
-
class Client
-
class BulkMessageSendService
-
def initialize(
-
with_recoverable_session:,
-
with_authenticated_driver:,
-
find_profile_for_interaction:,
-
dm_interaction_retry_pending:,
-
send_direct_message_via_api:,
-
mark_profile_dm_state:,
-
apply_dm_state_from_send_result:,
-
disconnected_session_error:,
-
open_dm:,
-
send_text_message_from_driver:
-
)
-
@with_recoverable_session = with_recoverable_session
-
@with_authenticated_driver = with_authenticated_driver
-
@find_profile_for_interaction = find_profile_for_interaction
-
@dm_interaction_retry_pending = dm_interaction_retry_pending
-
@send_direct_message_via_api = send_direct_message_via_api
-
@mark_profile_dm_state = mark_profile_dm_state
-
@apply_dm_state_from_send_result = apply_dm_state_from_send_result
-
@disconnected_session_error = disconnected_session_error
-
@open_dm = open_dm
-
@send_text_message_from_driver = send_text_message_from_driver
-
end
-
-
def call(usernames:, message_text:)
-
raise "Message cannot be blank" if message_text.to_s.strip.blank?
-
-
with_recoverable_session.call(label: "send_messages") do
-
sent = 0
-
failed = 0
-
fallback_usernames = []
-
-
usernames.each do |username|
-
begin
-
profile = find_profile_for_interaction.call(username: username)
-
if dm_interaction_retry_pending.call(profile)
-
failed += 1
-
next
-
end
-
-
api_result = send_direct_message_via_api.call(username: username, message_text: message_text)
-
if api_result[:sent]
-
mark_profile_dm_state.call(
-
profile: profile,
-
state: "messageable",
-
reason: "api_text_sent",
-
retry_after_at: nil
-
)
-
sent += 1
-
else
-
apply_dm_state_from_send_result.call(profile: profile, result: api_result)
-
fallback_usernames << username
-
end
-
rescue StandardError => e
-
raise if disconnected_session_error.call(e)
-
-
fallback_usernames << username
-
end
-
end
-
-
if fallback_usernames.any?
-
with_authenticated_driver.call do |driver|
-
fallback_usernames.each do |username|
-
begin
-
next unless open_dm.call(driver, username)
-
-
send_text_message_from_driver.call(driver, message_text)
-
profile = find_profile_for_interaction.call(username: username)
-
mark_profile_dm_state.call(
-
profile: profile,
-
state: "messageable",
-
reason: "ui_fallback_sent",
-
retry_after_at: nil
-
)
-
sent += 1
-
sleep(0.8)
-
rescue StandardError => e
-
raise if disconnected_session_error.call(e)
-
-
failed += 1
-
end
-
end
-
end
-
end
-
-
unresolved = usernames.length - sent - failed
-
failed += unresolved if unresolved.positive?
-
-
{
-
attempted: usernames.length,
-
sent: sent,
-
failed: failed
-
}
-
end
-
end
-
-
private
-
-
attr_reader :with_recoverable_session,
-
:with_authenticated_driver,
-
:find_profile_for_interaction,
-
:dm_interaction_retry_pending,
-
:send_direct_message_via_api,
-
:mark_profile_dm_state,
-
:apply_dm_state_from_send_result,
-
:disconnected_session_error,
-
:open_dm,
-
:send_text_message_from_driver
-
end
-
end
-
end
-
module Instagram
-
class Client
-
module CommentPostingService
-
def post_comment_to_media!(media_id:, shortcode:, comment_text:)
-
text = comment_text.to_s.strip
-
raise "Comment cannot be blank" if text.blank?
-
raise "Media id is required to post comment" if media_id.to_s.strip.blank?
-
raise "Post shortcode is required" if shortcode.to_s.strip.blank?
-
-
with_recoverable_session(label: "post_comment") do
-
with_authenticated_driver do |driver|
-
with_task_capture(
-
driver: driver,
-
task_name: "post_comment_open_post",
-
meta: { shortcode: shortcode.to_s, media_id: media_id.to_s }
-
) do
-
driver.navigate.to("#{INSTAGRAM_BASE_URL}/p/#{shortcode}/")
-
wait_for(driver, css: "body", timeout: 12)
-
dismiss_common_overlays!(driver)
-
end
-
-
payload = post_comment_via_api_from_browser_context(
-
driver: driver,
-
media_id: media_id.to_s.strip,
-
comment_text: text
-
)
-
-
parsed = parse_comment_api_payload(payload)
-
return parsed[:body].merge("method" => "api", "media_id" => media_id.to_s) if parsed[:ok]
-
-
# IG has started rejecting this endpoint on some sessions/builds with 403.
-
# Fallback to visible UI interaction to preserve "Forward Post" behavior.
-
capture_task_html(
-
driver: driver,
-
task_name: "post_comment_api_failed_fallback_ui",
-
status: "error",
-
meta: {
-
shortcode: shortcode.to_s,
-
media_id: media_id.to_s,
-
api_status: parsed[:status],
-
api_error: parsed[:error_message],
-
api_response_preview: parsed[:response_preview]
-
}
-
)
-
-
posted = comment_on_post_via_ui!(driver: driver, shortcode: shortcode.to_s, comment_text: text)
-
raise "Instagram comment API returned HTTP #{parsed[:status]}; UI fallback also failed" unless posted
-
-
{
-
"status" => "ok",
-
"method" => "ui_fallback",
-
"api_status" => parsed[:status],
-
"api_error" => parsed[:error_message],
-
"media_id" => media_id.to_s
-
}
-
end
-
end
-
end
-
-
def post_comment_via_api_from_browser_context(driver:, media_id:, comment_text:)
-
driver.execute_async_script(
-
<<~JS,
-
const mediaId = arguments[0];
-
const comment = arguments[1];
-
const done = arguments[arguments.length - 1];
-
-
const body = new URLSearchParams();
-
body.set("comment_text", comment);
-
-
const readCookie = (name) => {
-
try {
-
const cookie = document.cookie || "";
-
const parts = cookie.split(";").map((v) => v.trim());
-
const hit = parts.find((v) => v.startsWith(name + "="));
-
if (!hit) return "";
-
return decodeURIComponent(hit.slice(name.length + 1));
-
} catch (e) {
-
return "";
-
}
-
};
-
-
const csrf = readCookie("csrftoken");
-
const appId =
-
document.querySelector("meta[property='al:ios:app_store_id']")?.getAttribute("content") ||
-
"936619743392459";
-
const rolloutHash =
-
window._sharedData?.rollout_hash ||
-
window.__initialData?.rollout_hash ||
-
"";
-
-
fetch(`/api/v1/web/comments/${mediaId}/add/`, {
-
method: "POST",
-
credentials: "include",
-
headers: {
-
"Accept": "application/json, text/plain, */*",
-
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
-
"X-Requested-With": "XMLHttpRequest",
-
"X-CSRFToken": csrf,
-
"X-IG-App-ID": appId,
-
"X-Instagram-AJAX": rolloutHash,
-
"Referer": window.location.href
-
},
-
body: body.toString()
-
})
-
.then(async (resp) => {
-
const textBody = await resp.text();
-
done({
-
ok: resp.ok,
-
status: resp.status,
-
content_type: resp.headers.get("content-type") || "",
-
body: textBody
-
});
-
})
-
.catch((err) => {
-
done({
-
ok: false,
-
status: 0,
-
content_type: "",
-
body: "",
-
error: String(err)
-
});
-
});
-
JS
-
media_id.to_s.strip,
-
comment_text.to_s
-
)
-
end
-
-
def parse_comment_api_payload(payload)
-
unless payload.is_a?(Hash)
-
return {
-
ok: false,
-
status: nil,
-
error_message: "Unexpected response while posting comment",
-
response_preview: payload.to_s.byteslice(0, 500)
-
}
-
end
-
-
status = payload["status"]
-
body_raw = payload["body"].to_s
-
ctype = payload["content_type"].to_s
-
preview = body_raw.byteslice(0, 900)
-
return { ok: false, status: status, error_message: payload["error"].to_s.presence || "Request failed", response_preview: preview } unless payload["ok"] == true
-
-
return { ok: false, status: status, error_message: "Instagram comment API returned non-JSON response", response_preview: preview } unless ctype.include?("json")
-
-
body = JSON.parse(body_raw) rescue {}
-
body_status = body["status"].to_s
-
return { ok: false, status: status, error_message: "Instagram comment API returned status=#{body_status.presence || 'unknown'}", response_preview: preview } unless body_status == "ok"
-
-
{ ok: true, status: status, body: body, response_preview: preview }
-
end
-
end
-
end
-
end
-
module Instagram
-
class Client
-
module CoreHelpers
-
private
-
-
def parse_unix_time(value)
-
return nil if value.blank?
-
Time.at(value.to_i).utc
-
rescue StandardError
-
nil
-
end
-
-
def cookie_header_for(cookies)
-
Array(cookies).map do |c|
-
name = c["name"].to_s
-
value = c["value"].to_s
-
next if name.blank? || value.blank?
-
"#{name}=#{value}"
-
end.compact.join("; ")
-
end
-
-
def element_enabled?(el)
-
return false unless el
-
return false unless (el.displayed? rescue true)
-
-
disabled_attr = (el.attribute("disabled") rescue nil).to_s
-
aria_disabled = (el.attribute("aria-disabled") rescue nil).to_s
-
-
disabled_attr.blank? && aria_disabled != "true"
-
rescue StandardError
-
true
-
end
-
-
def human_pause(min_seconds = 0.15, max_seconds = 0.55)
-
return if max_seconds.to_f <= 0
-
min = min_seconds.to_f
-
max = max_seconds.to_f
-
d = min + (rand * (max - min))
-
sleep(d.clamp(0.0, 2.0))
-
end
-
-
def maybe_capture_filmstrip(driver, label:, seconds: 5.0, interval: 0.5)
-
return unless ENV["INSTAGRAM_FILMSTRIP"].present?
-
-
root = DEBUG_CAPTURE_DIR.join(Time.current.utc.strftime("%Y%m%d"))
-
FileUtils.mkdir_p(root)
-
-
started = Time.current.utc
-
deadline = started + seconds.to_f
-
frames = []
-
i = 0
-
-
while Time.current.utc < deadline
-
ts = Time.current.utc.strftime("%Y%m%dT%H%M%S.%LZ")
-
safe = label.to_s.downcase.gsub(/[^a-z0-9]+/, "_").gsub(/\A_|_\z/, "")
-
path = root.join("#{ts}_filmstrip_#{safe}_#{format('%03d', i)}.png")
-
begin
-
driver.save_screenshot(path.to_s)
-
frames << path.to_s
-
rescue StandardError
-
# best effort
-
end
-
i += 1
-
sleep(interval.to_f)
-
end
-
-
meta = {
-
timestamp: Time.current.utc.iso8601(3),
-
label: label,
-
seconds: seconds,
-
interval: interval,
-
frames: frames
-
}
-
File.write(root.join("#{started.strftime('%Y%m%dT%H%M%S.%LZ')}_filmstrip_#{label}.json"), JSON.pretty_generate(meta))
-
rescue StandardError
-
nil
-
end
-
-
def wait_for(driver, css: nil, xpath: nil, timeout: 10)
-
wait = Selenium::WebDriver::Wait.new(timeout: timeout)
-
wait.until do
-
if css
-
elements = driver.find_elements(css: css)
-
elements.each do |el|
-
begin
-
return el if el.displayed?
-
rescue Selenium::WebDriver::Error::StaleElementReferenceError
-
next
-
end
-
end
-
nil
-
elsif xpath
-
elements = driver.find_elements(xpath: xpath)
-
elements.each do |el|
-
begin
-
return el if el.displayed?
-
rescue Selenium::WebDriver::Error::StaleElementReferenceError
-
next
-
end
-
end
-
nil
-
end
-
end
-
end
-
-
def wait_for_present(driver, css: nil, xpath: nil, timeout: 10)
-
wait = Selenium::WebDriver::Wait.new(timeout: timeout)
-
wait.until do
-
if css
-
driver.find_elements(css: css).any?
-
elsif xpath
-
driver.find_elements(xpath: xpath).any?
-
end
-
end
-
end
-
-
def websocket_tls_guidance(verify)
-
tls = verify[:tls_issue].to_h
-
reason = tls[:reason].presence || "certificate validation error"
-
"Instagram DM transport failed: #{reason}. "\
-
"Chrome could not establish a trusted secure connection to Instagram chat endpoints. "\
-
"Install/trust the system CA used by your network proxy or, for local debugging only, "\
-
"set INSTAGRAM_CHROME_IGNORE_CERT_ERRORS=true and retry."
-
end
-
-
def detect_websocket_tls_issue(driver)
-
return { found: false } unless driver.respond_to?(:logs)
-
-
entries = driver.logs.get(:browser) rescue []
-
messages = Array(entries).map { |e| e.message.to_s }
-
-
# Common failure observed in this environment: the IG Direct gateway websocket fails TLS validation,
-
# which can prevent DMs from actually being delivered even though the UI clears the composer.
-
bad = messages.find { |m| m.include?("gateway.instagram.com/ws/streamcontroller") && m.include?("ERR_CERT_AUTHORITY_INVALID") }
-
return { found: true, reason: "ERR_CERT_AUTHORITY_INVALID", message: bad.to_s.byteslice(0, 2000) } if bad
-
-
other = messages.find { |m| m.include?("ERR_CERT_AUTHORITY_INVALID") }
-
return { found: true, reason: "ERR_CERT_AUTHORITY_INVALID", message: other.to_s.byteslice(0, 2000) } if other
-
-
{ found: false }
-
rescue StandardError => e
-
{ found: false, error: "#{e.class}: #{e.message}" }
-
end
-
-
def normalize_username(value)
-
value.to_s.strip.downcase.gsub(/[^a-z0-9._]/, "")
-
end
-
-
def normalize_count(value)
-
text = value.to_s.strip
-
return nil unless text.match?(/\A\d+\z/)
-
-
text.to_i
-
rescue StandardError
-
nil
-
end
-
-
def extract_profile_follow_counts(html)
-
# Best-effort; depends on English locale. Example:
-
# "246 Followers, 661 Following, 37 Posts - See Instagram photos..."
-
m = html.to_s.match(/content=\"\s*([\d,]+)\s*Followers,\s*([\d,]+)\s*Following\b/i)
-
return nil unless m
-
-
{
-
followers: m[1].to_s.delete(",").to_i,
-
following: m[2].to_s.delete(",").to_i
-
}
-
rescue StandardError
-
nil
-
end
-
-
def extract_story_users_from_home_html(html)
-
return [] if html.blank?
-
-
# Try multiple preloader patterns with more aggressive matching
-
patterns = [
-
"adp_PolarisStoriesV3TrayContainerQueryRelayPreloader_",
-
"adp_PolarisStoriesV",
-
"StoriesTrayContainer",
-
"stories_tray",
-
"story-tray",
-
"StoryTray",
-
"storyTray",
-
"stories-container",
-
"storiesContainer"
-
]
-
-
idx = nil
-
window = ""
-
-
patterns.each do |pattern|
-
idx = html.index(pattern)
-
if idx
-
window = html.byteslice(idx, 800_000) || ""
-
break
-
end
-
end
-
-
# If no preloader found, try direct username extraction from the entire HTML
-
if idx.nil?
-
# Look for any story-related patterns in the HTML
-
story_patterns = [
-
/\"username\":\"([A-Za-z0-9._]{1,30})\"[\s\S]{0,1000}\"has_story\":true/,
-
/\"user\":\{[\s\S]{0,2000}\"username\":\"([A-Za-z0-9._]{1,30})\"[\s\S]{0,2000}\"has_?story\":\s*true/,
-
/\"([A-Za-z0-9._]{1,30})\"[\s\S]{0,500}\"story\"/,
-
/\/stories\/([A-Za-z0-9._]{1,30})\//
-
]
-
-
usernames = []
-
story_patterns.each do |pattern|
-
matches = html.scan(pattern)
-
if matches.is_a?(Array)
-
matches = matches.flatten if matches.first.is_a?(Array)
-
usernames.concat(matches)
-
end
-
end
-
-
return usernames.map { |u| normalize_username(u) }.reject(&:blank?).uniq.take(12)
-
end
-
-
# Prefer story-tray item extraction
-
tray_usernames = window.scan(/\"user\":\{[\s\S]{0,4000}?\"username\":\"([A-Za-z0-9._]{1,30})\"[\s\S]{0,4000}?\"uuid\":\"/).flatten
-
tray_usernames = tray_usernames.map { |u| normalize_username(u) }.reject(&:blank?).uniq
-
return tray_usernames unless tray_usernames.empty?
-
-
# Fallback: grab usernames in this payload window
-
usernames = window.scan(/\"username\":\"([A-Za-z0-9._]{1,30})\"/).flatten.map { |u| normalize_username(u) }
-
usernames.reject(&:blank?).uniq
-
rescue StandardError => e
-
Rails.logger.error "Story extraction error: #{e.message}" if defined?(Rails)
-
[]
-
end
-
end
-
end
-
end
-
module Instagram
-
class Client
-
module DirectMessagingService
-
def send_messages!(usernames:, message_text:)
-
BulkMessageSendService.new(
-
with_recoverable_session: method(:with_recoverable_session),
-
with_authenticated_driver: method(:with_authenticated_driver),
-
find_profile_for_interaction: method(:find_profile_for_interaction),
-
dm_interaction_retry_pending: method(:dm_interaction_retry_pending?),
-
send_direct_message_via_api: method(:send_direct_message_via_api!),
-
mark_profile_dm_state: method(:mark_profile_dm_state!),
-
apply_dm_state_from_send_result: method(:apply_dm_state_from_send_result),
-
disconnected_session_error: method(:disconnected_session_error?),
-
open_dm: method(:open_dm),
-
send_text_message_from_driver: method(:send_text_message_from_driver!)
-
).call(usernames: usernames, message_text: message_text)
-
end
-
-
def send_message_to_user!(username:, message_text:)
-
SingleMessageSendService.new(
-
with_recoverable_session: method(:with_recoverable_session),
-
with_authenticated_driver: method(:with_authenticated_driver),
-
with_task_capture: method(:with_task_capture),
-
find_profile_for_interaction: method(:find_profile_for_interaction),
-
dm_interaction_retry_pending: method(:dm_interaction_retry_pending?),
-
send_direct_message_via_api: method(:send_direct_message_via_api!),
-
mark_profile_dm_state: method(:mark_profile_dm_state!),
-
apply_dm_state_from_send_result: method(:apply_dm_state_from_send_result),
-
open_dm: method(:open_dm),
-
send_text_message_from_driver: method(:send_text_message_from_driver!)
-
).call(username: username, message_text: message_text)
-
end
-
-
def send_direct_message_via_api!(username:, message_text:)
-
text = message_text.to_s.strip
-
return { sent: false, method: "api", reason: "blank_message_text" } if text.blank?
-
-
uname = normalize_username(username)
-
return { sent: false, method: "api", reason: "blank_username" } if uname.blank?
-
-
user_id = story_user_id_for(username: uname)
-
return { sent: false, method: "api", reason: "missing_user_id" } if user_id.blank?
-
-
thread_id = direct_thread_id_for_user(user_id: user_id)
-
return { sent: false, method: "api", reason: "missing_thread_id" } if thread_id.blank?
-
-
body = ig_api_post_form_json(
-
path: "/api/v1/direct_v2/threads/broadcast/text/",
-
referer: "#{INSTAGRAM_BASE_URL}/direct/t/#{thread_id}/",
-
form: {
-
action: "send_item",
-
client_context: story_api_client_context,
-
thread_id: thread_id,
-
text: text
-
}
-
)
-
return { sent: false, method: "api", reason: "empty_api_response" } unless body.is_a?(Hash)
-
-
status = body["status"].to_s
-
if status == "ok"
-
return {
-
sent: true,
-
method: "api",
-
reason: "text_sent",
-
api_status: status,
-
api_thread_id: body.dig("payload", "thread_id").to_s.presence || thread_id,
-
api_item_id: body.dig("payload", "item_id").to_s.presence
-
}
-
end
-
-
{
-
sent: false,
-
method: "api",
-
reason: body["message"].to_s.presence || body.dig("payload", "message").to_s.presence || body["error_type"].to_s.presence || "api_status_#{status.presence || 'unknown'}",
-
api_status: status.presence || "unknown",
-
api_http_status: body["_http_status"],
-
api_error_code: body.dig("payload", "error_code").to_s.presence || body["error_code"].to_s.presence
-
}
-
rescue StandardError => e
-
{ sent: false, method: "api", reason: "api_exception:#{e.class.name}" }
-
end
-
-
def verify_messageability!(username:)
-
with_recoverable_session(label: "verify_messageability") do
-
result = verify_messageability_from_api(username: username)
-
return result if !result.is_a?(Hash) || !result[:can_message].nil?
-
-
with_authenticated_driver do |driver|
-
verify_messageability_from_driver(driver, username: username)
-
end
-
end
-
end
-
-
def verify_messageability_from_api(username:)
-
uname = normalize_username(username)
-
return { can_message: nil, restriction_reason: "Username is blank", source: "api" } if uname.blank?
-
-
user_id = story_user_id_for(username: uname)
-
if user_id.blank?
-
return {
-
can_message: false,
-
restriction_reason: "Unable to resolve user id via API",
-
source: "api",
-
dm_state: "unknown",
-
dm_reason: "missing_user_id",
-
dm_retry_after_at: Time.current + 6.hours
-
}
-
end
-
-
thread_result = create_direct_thread_for_user(user_id: user_id, use_cache: false)
-
thread_id = thread_result[:thread_id].to_s
-
return { can_message: true, restriction_reason: nil, source: "api", dm_state: "messageable", dm_reason: "thread_created", dm_retry_after_at: nil } if thread_id.present?
-
-
reason = thread_result[:reason].to_s.presence || "missing_thread_id"
-
retry_after =
-
if thread_result[:api_http_status].to_i == 403
-
Time.current + STORY_INTERACTION_RETRY_DAYS.days
-
else
-
Time.current + 12.hours
-
end
-
-
{
-
can_message: false,
-
restriction_reason: "DM unavailable via API (#{reason})",
-
source: "api",
-
dm_state: "unavailable",
-
dm_reason: reason,
-
dm_retry_after_at: retry_after,
-
api_status: thread_result[:api_status],
-
api_http_status: thread_result[:api_http_status],
-
api_error_code: thread_result[:api_error_code]
-
}
-
rescue StandardError => e
-
{
-
can_message: nil,
-
restriction_reason: "Unable to verify messaging availability (api exception)",
-
source: "api",
-
dm_state: "unknown",
-
dm_reason: "exception:#{e.class.name}",
-
dm_retry_after_at: Time.current + 6.hours
-
}
-
end
-
-
def verify_messageability_from_driver(driver, username:)
-
username = normalize_username(username)
-
raise "Username cannot be blank" if username.blank?
-
-
with_task_capture(driver: driver, task_name: "profile_verify_messageability", meta: { username: username }) do
-
ok = open_dm(driver, username)
-
if !ok
-
{
-
can_message: false,
-
restriction_reason: "Unable to open DM thread",
-
source: "ui",
-
dm_state: "unavailable",
-
dm_reason: "unable_to_open_dm_thread",
-
dm_retry_after_at: Time.current + 12.hours
-
}
-
else
-
begin
-
wait_for_present(driver, css: dm_textbox_css, timeout: 10)
-
{
-
can_message: true,
-
restriction_reason: nil,
-
source: "ui",
-
dm_state: "messageable",
-
dm_reason: "composer_visible",
-
dm_retry_after_at: nil
-
}
-
rescue Selenium::WebDriver::Error::TimeoutError
-
{
-
can_message: false,
-
restriction_reason: "Unable to open message box",
-
source: "ui",
-
dm_state: "unavailable",
-
dm_reason: "message_box_unavailable",
-
dm_retry_after_at: Time.current + 12.hours
-
}
-
end
-
end
-
end
-
end
-
-
def open_dm_from_profile(driver, username)
-
driver.navigate.to("#{INSTAGRAM_BASE_URL}/#{username}/")
-
wait_for(driver, css: "body", timeout: 10)
-
dismiss_common_overlays!(driver)
-
human_pause
-
-
# Case-insensitive contains("message") across common clickable elements.
-
ci = "translate(normalize-space(.), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz')"
-
message_xpath = "//*[self::button or (self::div and @role='button') or self::a][contains(#{ci}, 'message')]"
-
aria_xpath = "//*[@aria-label and contains(translate(@aria-label,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'message')]"
-
-
# Wait for the CTA to be visible. Profile pages often render in stages; grabbing `.first` can pick a hidden node.
-
message_button =
-
begin
-
wait_for(driver, xpath: message_xpath, timeout: 10)
-
rescue Selenium::WebDriver::Error::TimeoutError
-
nil
-
end
-
message_button ||= driver.find_elements(xpath: aria_xpath).find { |el| el.displayed? rescue false }
-
-
return false unless message_button
-
-
click_ok =
-
begin
-
driver.action.move_to(message_button).click.perform
-
true
-
rescue StandardError
-
js_click(driver, message_button)
-
end
-
-
return false unless click_ok
-
maybe_capture_filmstrip(driver, label: "dm_open_profile_after_click")
-
-
true
-
end
-
-
def open_dm(driver, username)
-
username = normalize_username(username)
-
return false if username.blank?
-
-
# Strategy 1: profile page CTA
-
ok = with_task_capture(driver: driver, task_name: "dm_open_profile", meta: { username: username }) do
-
open_dm_from_profile(driver, username)
-
end
-
if ok
-
begin
-
wait_for_dm_composer_or_thread!(driver, timeout: 12)
-
return true
-
rescue Selenium::WebDriver::Error::TimeoutError
-
# fall through to next strategy
-
end
-
end
-
-
# Strategy 2: direct/new flow (SPA-safe)
-
ok2 = with_task_capture(driver: driver, task_name: "dm_open_direct_new", meta: { username: username }) do
-
open_dm_via_direct_new(driver, username)
-
end
-
return true if ok2
-
-
# On some IG builds the URL flips to the thread before the composer becomes queryable.
-
driver.current_url.to_s.include?("/direct/t/")
-
end
-
-
def open_dm_via_direct_new(driver, username)
-
driver.navigate.to("#{INSTAGRAM_BASE_URL}/direct/new/")
-
wait_for(driver, css: "body", timeout: 12)
-
dismiss_common_overlays!(driver)
-
human_pause
-
-
# Find a search box for recipients.
-
selectors = [
-
"input[name='queryBox']",
-
"input[placeholder*='Search']",
-
"input[aria-label*='Search']",
-
"input[type='text']"
-
]
-
-
typed = false
-
3.times do |attempt|
-
input =
-
selectors.lazy.map { |sel| driver.find_elements(css: sel).find(&:displayed?) }.find(&:present?) ||
-
selectors.lazy.map { |sel| driver.find_elements(css: sel).first }.find(&:present?)
-
-
break unless input
-
-
begin
-
input.click
-
# Clear any existing value.
-
input.send_keys([:control, "a"])
-
input.send_keys(:backspace)
-
input.send_keys(username)
-
typed = true
-
human_pause
-
break
-
rescue Selenium::WebDriver::Error::StaleElementReferenceError, Selenium::WebDriver::Error::ElementNotInteractableError
-
Rails.logger.info("open_dm_via_direct_new retry typing (attempt #{attempt + 1}/3)")
-
sleep(0.5)
-
next
-
end
-
end
-
-
return false unless typed
-
capture_task_html(driver: driver, task_name: "dm_open_direct_new_after_type", status: "ok", meta: { username: username })
-
-
# Wait for the username to appear in results and click it.
-
username_down = username.to_s.downcase
-
ci = "translate(normalize-space(.), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz')"
-
row_xpath = "//div[@role='button'][.//*[contains(#{ci}, '#{username_down}')]]"
-
row_with_img_xpath = "//div[@role='button'][.//*[contains(#{ci}, '#{username_down}')]]//img/ancestor::div[@role='button'][1]"
-
-
begin
-
Selenium::WebDriver::Wait.new(timeout: 12).until do
-
driver.find_elements(xpath: row_with_img_xpath).any? ||
-
driver.find_elements(xpath: row_xpath).any? ||
-
driver.find_elements(xpath: "//*[contains(#{ci}, '#{username_down}')]").any?
-
end
-
rescue Selenium::WebDriver::Error::TimeoutError
-
return false
-
end
-
-
candidate =
-
driver.find_elements(xpath: row_with_img_xpath).find { |el| el.displayed? rescue false } ||
-
driver.find_elements(xpath: row_xpath).find { |el| el.displayed? rescue false } ||
-
driver.find_elements(xpath: row_xpath).first ||
-
driver.find_elements(xpath: "//*[contains(#{ci}, '#{username_down}')]").find { |el| el.displayed? rescue false } ||
-
driver.find_elements(xpath: "//*[contains(#{ci}, '#{username_down}')]").first
-
return false unless candidate
-
-
# Click nearest clickable container; otherwise click the text node parent.
-
clickable =
-
begin
-
driver.execute_script(<<~JS, candidate)
-
const el = arguments[0];
-
// For direct/new, the row itself is usually role=button.
-
if (el && el.getAttribute && el.getAttribute("role") === "button") return el;
-
const btn = el.closest("button,[role='button']");
-
return btn || el;
-
JS
-
rescue StandardError
-
candidate
-
end
-
-
begin
-
driver.action.move_to(clickable).click.perform
-
rescue StandardError
-
js_click(driver, clickable)
-
end
-
human_pause
-
capture_task_html(driver: driver, task_name: "dm_open_direct_new_after_pick", status: "ok", meta: { username: username })
-
-
# Click the continuation CTA to open chat ("Next" on some builds, "Chat" on others).
-
continue_btn = nil
-
begin
-
Selenium::WebDriver::Wait.new(timeout: 12).until do
-
continue_btn =
-
driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][normalize-space()='Next']").find(&:displayed?) ||
-
driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][normalize-space()='Chat']").find(&:displayed?) ||
-
driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][contains(translate(normalize-space(.),'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'next')]").find(&:displayed?) ||
-
driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][contains(translate(normalize-space(.),'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'chat')]").find(&:displayed?)
-
continue_btn.present? && element_enabled?(continue_btn)
-
end
-
rescue Selenium::WebDriver::Error::TimeoutError
-
continue_btn = nil
-
end
-
-
# Some UI variants jump directly into the thread immediately after recipient selection.
-
return true if driver.current_url.to_s.include?("/direct/t/")
-
return false unless continue_btn
-
-
begin
-
driver.action.move_to(continue_btn).click.perform
-
rescue StandardError
-
js_click(driver, continue_btn)
-
end
-
maybe_capture_filmstrip(driver, label: "dm_open_direct_new_after_next")
-
capture_task_html(driver: driver, task_name: "dm_open_direct_new_after_next", status: "ok", meta: { username: username })
-
-
wait_for_dm_composer_or_thread!(driver, timeout: 16)
-
true
-
rescue Selenium::WebDriver::Error::TimeoutError
-
false
-
end
-
-
def wait_for_dm_composer_or_thread!(driver, timeout:)
-
Selenium::WebDriver::Wait.new(timeout: timeout).until do
-
url = driver.current_url.to_s
-
# Some failures bounce back to inbox; treat as not-opened.
-
next false if url.include?("/direct/inbox")
-
-
url.include?("/direct/t/") || driver.find_elements(css: "div[role='textbox']").any?
-
end
-
end
-
-
def dm_textbox_css
-
# The DM composer is a contenteditable div (Lexical editor). On some builds there can be multiple
-
# role=textbox nodes (hidden + visible), so we prefer the visible contenteditable one.
-
"div[role='textbox'][contenteditable='true'], div[role='textbox']"
-
end
-
-
def send_text_message_from_driver!(driver, message_text, expected_username: nil)
-
raise "Message cannot be blank" if message_text.to_s.strip.blank?
-
-
css = dm_textbox_css
-
wait_for_present(driver, css: css, timeout: 12)
-
-
box = find_visible_dm_textbox(driver)
-
raise Selenium::WebDriver::Error::NoSuchElementError, "No DM textbox found" unless box
-
-
3.times do |attempt|
-
begin
-
driver.execute_script("arguments[0].scrollIntoView({block: 'center', inline: 'nearest'});", box)
-
driver.execute_script("arguments[0].focus();", box)
-
driver.execute_script("arguments[0].click();", box)
-
rescue StandardError
-
# best effort
-
end
-
-
begin
-
box.click
-
rescue Selenium::WebDriver::Error::ElementClickInterceptedError, Selenium::WebDriver::Error::ElementNotInteractableError
-
# ignore; we'll try typing via actions as a fallback
-
end
-
-
begin
-
# Clear any residual draft text (best effort).
-
begin
-
driver.action.click(box).key_down(:control).send_keys("a").key_up(:control).send_keys(:backspace).perform
-
rescue StandardError
-
nil
-
end
-
-
# Type using actions (more reliable on IG's Lexical composer than direct send_keys on the element).
-
driver.action.click(box).send_keys(message_text.to_s).perform
-
-
typed = read_dm_textbox_text(driver)
-
capture_task_html(
-
driver: driver,
-
task_name: "dm_send_text_after_type",
-
status: "ok",
-
meta: { expected_username: expected_username, message_preview: message_text.to_s.strip.byteslice(0, 80), textbox_text_preview: typed.to_s.byteslice(0, 120) }
-
)
-
-
# Prefer clicking "Send" first. Recent IG builds sometimes clear the composer on Enter even when
-
# the message never actually sends (silent failure), so Enter-first can give a false sense of success.
-
clicked_send = click_dm_send_button(driver, textbox: box)
-
capture_task_html(
-
driver: driver,
-
task_name: "dm_send_text_after_send_click",
-
status: "ok",
-
meta: { expected_username: expected_username, message_preview: message_text.to_s.strip.byteslice(0, 80), clicked_send: clicked_send }
-
)
-
-
# If we could not click the Send button, attempt Enter as a fallback.
-
enter_attempted = false
-
if !(clicked_send.is_a?(Hash) && clicked_send[:clicked])
-
begin
-
box.send_keys(:enter)
-
enter_attempted = true
-
rescue StandardError
-
enter_attempted = false
-
end
-
end
-
-
after_enter_text = read_dm_textbox_text(driver)
-
capture_task_html(
-
driver: driver,
-
task_name: "dm_send_text_after_enter",
-
status: "ok",
-
meta: {
-
expected_username: expected_username,
-
message_preview: message_text.to_s.strip.byteslice(0, 80),
-
enter_attempted: enter_attempted,
-
textbox_text_preview: after_enter_text.to_s.byteslice(0, 120),
-
send_button_clicked: (clicked_send.is_a?(Hash) ? clicked_send[:clicked] : nil),
-
send_button_reason: (clicked_send.is_a?(Hash) ? clicked_send[:reason] : nil)
-
}
-
)
-
-
sent = (clicked_send.is_a?(Hash) ? clicked_send[:clicked] : !!clicked_send) || enter_attempted
-
-
unless sent
-
# Last resort.
-
driver.action.send_keys(:enter).perform
-
end
-
break
-
rescue Selenium::WebDriver::Error::StaleElementReferenceError
-
sleep(0.4)
-
box = find_visible_dm_textbox(driver)
-
next
-
rescue Selenium::WebDriver::Error::ElementNotInteractableError, Selenium::WebDriver::Error::InvalidElementStateError
-
# Fallback: send keys to the active element (Instagram's Lexical editor usually focuses it).
-
driver.action.send_keys(message_text.to_s).perform
-
tb = find_visible_dm_textbox(driver)
-
click_dm_send_button(driver, textbox: tb).to_h[:clicked] || driver.action.send_keys(:enter).perform
-
break
-
rescue StandardError
-
raise if attempt >= 2
-
sleep(0.6)
-
next
-
end
-
end
-
-
verify = verify_dm_send(driver, message_text.to_s, expected_username: expected_username)
-
return true if verify[:ok]
-
if verify[:reason].to_s.start_with?("websocket_tls_error")
-
raise websocket_tls_guidance(verify)
-
end
-
-
# Force a debug capture even though the caller will also capture on error.
-
capture_task_html(driver: driver, task_name: "dm_send_text_verify", status: "error", meta: verify)
-
raise "Message not confirmed as sent (#{verify[:reason]})"
-
end
-
-
def find_visible_dm_textbox(driver)
-
candidates = driver.find_elements(css: "div[role='textbox'][contenteditable='true']")
-
candidates = driver.find_elements(css: "div[role='textbox']") if candidates.empty?
-
-
candidates.find do |el|
-
begin
-
el.displayed?
-
rescue Selenium::WebDriver::Error::StaleElementReferenceError
-
false
-
end
-
end || candidates.first
-
end
-
-
def read_dm_textbox_text(driver)
-
driver.execute_script(<<~JS)
-
const textbox =
-
document.querySelector("div[role='textbox'][contenteditable='true']") ||
-
document.querySelector("div[role='textbox']");
-
if (!textbox) return null;
-
return (textbox.innerText || "").toString();
-
JS
-
rescue StandardError
-
nil
-
end
-
-
def verify_dm_send(driver, message_text, expected_username: nil)
-
needle = message_text.to_s.strip
-
return { ok: false, reason: "blank message_text" } if needle.blank?
-
-
# Poll briefly because the UI can take a moment to append the outgoing bubble.
-
last = nil
-
40.times do |i|
-
# Try to keep the message list near the bottom so the newest outgoing bubble is mounted.
-
begin
-
driver.execute_script(<<~JS)
-
const main =
-
document.querySelector("div[role='main']") ||
-
document.scrollingElement ||
-
document.documentElement ||
-
document.body;
-
try { main.scrollTop = 1e9; } catch (e) {}
-
try { window.scrollTo(0, document.body.scrollHeight); } catch (e) {}
-
JS
-
rescue StandardError
-
nil
-
end
-
-
last = driver.execute_script(<<~JS, needle, expected_username.to_s)
-
const needle = (arguments[0] || "").replace(/\\s+/g, " ").trim();
-
const expected = (arguments[1] || "").toLowerCase().trim();
-
-
const norm = (s) => (s || "").replace(/\\s+/g, " ").trim();
-
-
const textbox =
-
document.querySelector("div[role='textbox'][contenteditable='true']") ||
-
document.querySelector("div[role='textbox']");
-
-
const textboxText = textbox ? norm(textbox.innerText) : null;
-
const textboxEmpty = !textboxText || textboxText.length === 0;
-
-
// Common send failure surface text (best effort).
-
const bodyText = norm(document.body && document.body.innerText);
-
const sendError =
-
bodyText.includes("couldn't send") ||
-
bodyText.includes("could not send") ||
-
bodyText.includes("try again") && bodyText.includes("message");
-
-
const messageRequestInterstitial =
-
bodyText.includes("message request") ||
-
bodyText.includes("message requests") ||
-
(bodyText.includes("allow") && bodyText.includes("decline") && bodyText.includes("message"));
-
-
const alertTexts = Array.from(document.querySelectorAll("[role='alert'],[aria-live='polite'],[aria-live='assertive']"))
-
.map((n) => norm(n && (n.innerText || n.textContent)))
-
.filter((t) => t && t.length > 0)
-
.slice(0, 10);
-
-
// Best-effort: try to validate we are in the intended thread.
-
let threadMatches = null;
-
if (expected) {
-
const hrefs = Array.from(document.querySelectorAll("a[href^='/']"))
-
.map((a) => (a.getAttribute("href") || "").toLowerCase());
-
threadMatches = hrefs.some((h) => h === `/${expected}/` || h.startsWith(`/${expected}/`));
-
}
-
-
const nodes = Array.from(document.querySelectorAll(
-
"div[role='row'], div[role='listitem'], [dir='auto'], span[data-lexical-text='true']"
-
));
-
let bubbleFound = false;
-
for (let i = nodes.length - 1; i >= 0 && i >= nodes.length - 400; i--) {
-
const n = nodes[i];
-
if (!n) continue;
-
if (textbox && (textbox === n || textbox.contains(n) || n.contains(textbox))) continue;
-
const t = norm(n.textContent || n.innerText);
-
const a = norm(n.getAttribute && n.getAttribute("aria-label"));
-
const combined = (t + " " + a).trim();
-
if (combined && combined.includes(needle)) { bubbleFound = true; break; }
-
}
-
-
return { textboxEmpty, textboxText, bubbleFound, threadMatches, sendError, messageRequestInterstitial, alertTexts };
-
JS
-
-
if last.is_a?(Hash) && last["sendError"] == true
-
return { ok: false, reason: "send_error_visible", details: last }
-
end
-
-
if last.is_a?(Hash) && last["messageRequestInterstitial"] == true
-
return { ok: false, reason: "message_request_interstitial_visible", details: last }
-
end
-
-
if last.is_a?(Hash) && last["textboxEmpty"] == true && last["bubbleFound"] == true
-
# If we can determine threadMatches, require it; otherwise accept.
-
if expected_username.to_s.strip.present?
-
tm = last["threadMatches"]
-
if tm.nil? || tm == true
-
return { ok: true, reason: "verified", details: last }
-
end
-
else
-
return { ok: true, reason: "verified", details: last }
-
end
-
end
-
-
sleep(0.75)
-
-
# Fail fast if DM transport is broken at the browser/network layer.
-
if (i % 4).zero?
-
tls = detect_websocket_tls_issue(driver)
-
if tls[:found]
-
return {
-
ok: false,
-
reason: "websocket_tls_error #{tls[:reason]}",
-
tls_issue: tls,
-
details: last,
-
expected_username: expected_username,
-
message_preview: needle.byteslice(0, 80)
-
}
-
end
-
end
-
-
# One refresh mid-way can help when the UI doesn't mount the most recent bubble immediately.
-
if i == 10
-
begin
-
driver.navigate.refresh
-
wait_for(driver, css: "body", timeout: 10)
-
rescue StandardError
-
nil
-
end
-
end
-
end
-
-
tls = detect_websocket_tls_issue(driver)
-
if tls[:found]
-
return {
-
ok: false,
-
reason: "websocket_tls_error #{tls[:reason]}",
-
tls_issue: tls,
-
details: last,
-
expected_username: expected_username,
-
message_preview: needle.byteslice(0, 80)
-
}
-
end
-
-
# If we couldn't find the bubble, but the textbox is empty, treat as "unknown" rather than success.
-
textbox_empty = last.is_a?(Hash) ? last["textboxEmpty"] : nil
-
bubble = last.is_a?(Hash) ? last["bubbleFound"] : nil
-
thread = last.is_a?(Hash) ? last["threadMatches"] : nil
-
-
{
-
ok: false,
-
reason: "textbox_empty=#{textbox_empty.inspect} bubble_found=#{bubble.inspect} thread_matches=#{thread.inspect} message_request_interstitial=#{last.is_a?(Hash) ? last['messageRequestInterstitial'].inspect : 'nil'}",
-
details: last,
-
expected_username: expected_username,
-
message_preview: needle.byteslice(0, 80)
-
}
-
rescue StandardError => e
-
{ ok: false, reason: "verify_exception #{e.class}: #{e.message}" }
-
end
-
-
def click_dm_send_button(driver, textbox: nil)
-
return { clicked: false, reason: "no_textbox" } unless textbox
-
# Mark the send button in-DOM so we can click it via WebDriver actions (more reliable than JS click).
-
mark =
-
driver.execute_script(<<~JS, textbox)
-
const textbox = arguments[0];
-
if (!textbox) return { marked: false, reason: "no_textbox" };
-
-
// Clear previous marks (best effort).
-
try {
-
document.querySelectorAll("[data-codex-send-btn='1']").forEach((n) => n.removeAttribute("data-codex-send-btn"));
-
} catch (e) {}
-
-
const isVisible = (el) => {
-
if (!el) return false;
-
const style = window.getComputedStyle(el);
-
if (style.display === "none" || style.visibility === "hidden" || style.opacity === "0") return false;
-
const r = el.getBoundingClientRect();
-
return (r.width > 0 && r.height > 0);
-
};
-
-
const selectors = [
-
"[role='button'][aria-label='Send']",
-
"[role='button'][aria-label*='Send']",
-
"button[aria-label='Send']",
-
"button[aria-label*='Send']",
-
"svg[aria-label='Send']",
-
"svg[aria-label*='Send']"
-
];
-
-
let root = textbox;
-
for (let depth = 0; depth < 10 && root; depth++) {
-
let candidate = null;
-
for (const sel of selectors) {
-
const el = root.querySelector ? root.querySelector(sel) : null;
-
if (el) { candidate = el; break; }
-
}
-
-
if (candidate) {
-
let button = candidate;
-
if (button && button.tagName && button.tagName.toLowerCase() === "svg") {
-
button = button.closest("button,[role='button']") || button;
-
}
-
-
const preview = (button && button.outerHTML ? button.outerHTML : "").slice(0, 900);
-
const ariaLabel = button && button.getAttribute ? button.getAttribute("aria-label") : null;
-
if (!button) return { marked: false, reason: "send_button_null" };
-
if (!isVisible(button)) return { marked: false, reason: "send_button_not_visible", ariaLabel, outerHTMLPreview: preview };
-
-
try { button.setAttribute("data-codex-send-btn", "1"); } catch (e) {}
-
return { marked: true, ariaLabel, outerHTMLPreview: preview };
-
}
-
-
root = root.parentElement;
-
}
-
-
return { marked: false, reason: "send_button_not_found_near_textbox" };
-
JS
-
-
mark = mark.to_h if mark.respond_to?(:to_h)
-
return { clicked: false, reason: "unexpected_js_return: #{mark.class}" } unless mark.is_a?(Hash)
-
-
mark = mark.transform_keys { |k| k.to_s.to_sym }
-
return { clicked: false, reason: mark[:reason] || "send_button_not_marked", aria_label: mark[:ariaLabel], outer_html_preview: mark[:outerHTMLPreview] } unless mark[:marked]
-
-
el = driver.find_element(css: "[data-codex-send-btn='1']")
-
begin
-
driver.action.move_to(el).click.perform
-
rescue StandardError
-
js_click(driver, el)
-
end
-
-
# Clean up the mark to avoid confusing later steps.
-
begin
-
driver.execute_script("arguments[0].removeAttribute('data-codex-send-btn');", el)
-
rescue StandardError
-
nil
-
end
-
-
{ clicked: true, reason: "clicked", aria_label: mark[:ariaLabel], outer_html_preview: mark[:outerHTMLPreview] }
-
rescue StandardError => e
-
{ clicked: false, reason: "send_click_exception #{e.class}: #{e.message}" }
-
end
-
-
def extract_conversation_users_from_inbox_html(html)
-
users = {}
-
verify_segments = 0
-
-
return [users, verify_segments] if html.blank?
-
-
# Extract from the Lightspeed payload embedded in the inbox page. Example structure:
-
# ... "verifyContactRowExists", ... , "Display Name", ... , "username", [9], [9]]]
-
#
-
# We avoid DOM selectors here because the inbox is frequently rendered as role="button" rows
-
# and the username often only appears inside embedded payloads.
-
# In many builds the payload is itself a JSON-encoded string, so quotes appear as \"...\".
-
segments = html.scan(/\\\"verifyContactRowExists\\\"[\s\S]{0,4000}?\[9\],\s*\[9\]\]\]/)
-
segments = html.scan(/"verifyContactRowExists"[\s\S]{0,4000}?\[9\],\s*\[9\]\]\]/) if segments.empty?
-
verify_segments += segments.length
-
-
segments.each do |segment|
-
# Candidate usernames appear lowercase in this payload (usernames are case-insensitive but stored normalized).
-
token_re =
-
if segment.include?("\\\"")
-
/\\\"([A-Za-z0-9._]{1,30})\\\"/
-
else
-
/"([A-Za-z0-9._]{1,30})"/
-
end
-
-
tokens = segment.scan(token_re).flatten
-
candidate_usernames = tokens.select { |t| t == t.downcase && t.match?(/\A[a-z0-9._]{1,30}\z/) }
-
username = candidate_usernames.last.to_s
-
next if username.blank?
-
-
display_re =
-
if segment.include?("\\\"")
-
/\\\"([^\\\"]{1,80})\\\"/
-
else
-
/"([^"]{1,80})"/
-
end
-
-
display_candidates = segment.scan(display_re).flatten
-
display = display_candidates.reverse.find do |t|
-
next false if t.blank?
-
next false if t.include?("/") || t.match?(%r{\Ahttps?://}i)
-
next false if t.match?(/\A[a-z0-9._]{1,30}\z/) # likely a username token
-
next false if t.match?(/\Amessaging\b/i) || t.match?(/\blightspeed\b/i) || t.match?(/\bmedia_fallback\b/i)
-
true
-
end
-
-
users[normalize_username(username)] ||= { display_name: display.presence || username }
-
end
-
-
[users, verify_segments]
-
rescue StandardError
-
[users, verify_segments]
-
end
-
-
def mark_profile_dm_state!(profile:, state:, reason:, retry_after_at: nil)
-
return unless profile
-
-
can_message_value =
-
case state.to_s
-
when "messageable"
-
true
-
when "unknown"
-
nil
-
else
-
false
-
end
-
-
payload = {
-
can_message: can_message_value,
-
restriction_reason: can_message_value == true ? nil : reason.to_s.presence,
-
dm_interaction_state: state.to_s.presence,
-
dm_interaction_reason: reason.to_s.presence,
-
dm_interaction_checked_at: Time.current,
-
dm_interaction_retry_after_at: retry_after_at
-
}
-
profile.update!(payload)
-
rescue StandardError
-
nil
-
end
-
-
def apply_dm_state_from_send_result(profile:, result:)
-
return unless profile
-
return unless result.is_a?(Hash)
-
-
reason = result[:reason].to_s.presence || "send_failed"
-
retry_after =
-
if result[:api_http_status].to_i == 403
-
Time.current + STORY_INTERACTION_RETRY_DAYS.days
-
else
-
Time.current + 12.hours
-
end
-
-
mark_profile_dm_state!(
-
profile: profile,
-
state: "unavailable",
-
reason: reason,
-
retry_after_at: retry_after
-
)
-
end
-
end
-
end
-
end
-
module Instagram
-
class Client
-
module FeedEngagementService
-
# Captures "home feed" post identifiers that appear while scrolling.
-
#
-
# This does NOT auto-like or auto-comment. It only records posts, downloads media (temporarily),
-
# and queues analysis. Interaction should remain a user-confirmed action in the UI.
-
def capture_home_feed_posts!(rounds: 4, delay_seconds: 45, max_new: 20)
-
with_recoverable_session(label: "feed_capture") do
-
with_authenticated_driver do |driver|
-
with_task_capture(driver: driver, task_name: "feed_capture_home", meta: { rounds: rounds, delay_seconds: delay_seconds, max_new: max_new }) do
-
driver.navigate.to(INSTAGRAM_BASE_URL)
-
wait_for(driver, css: "body", timeout: 12)
-
dismiss_common_overlays!(driver)
-
-
seen = 0
-
new_posts = 0
-
-
rounds.to_i.clamp(1, 25).times do |i|
-
dismiss_common_overlays!(driver)
-
-
items = extract_feed_items_from_dom(driver)
-
now = Time.current
-
-
items.each do |it|
-
sc = it[:shortcode].to_s.strip
-
next if sc.blank?
-
-
seen += 1
-
-
post = @account.instagram_posts.find_or_initialize_by(shortcode: sc)
-
is_new = post.new_record?
-
-
post.detected_at ||= now
-
post.post_kind = it[:post_kind].presence || post.post_kind.presence || "unknown"
-
post.author_username = it[:author_username].presence || post.author_username
-
post.media_url = it[:media_url].presence || post.media_url
-
post.caption = it[:caption].presence || post.caption
-
post.metadata = (post.metadata || {}).merge(it[:metadata] || {}).merge(round: i + 1)
-
post.save! if post.changed?
-
-
if is_new
-
new_posts += 1
-
-
# Download media and analyze (best effort).
-
DownloadInstagramPostMediaJob.perform_later(instagram_post_id: post.id) if post.media_url.present?
-
AnalyzeInstagramPostJob.perform_later(instagram_post_id: post.id)
-
end
-
-
break if new_posts >= max_new.to_i.clamp(1, 200)
-
end
-
-
break if new_posts >= max_new.to_i.clamp(1, 200)
-
-
# Scroll down a bit.
-
driver.execute_script("window.scrollBy(0, Math.max(700, window.innerHeight * 0.85));")
-
sleep(delay_seconds.to_i.clamp(10, 120))
-
end
-
-
{ seen_posts: seen, new_posts: new_posts }
-
end
-
end
-
end
-
end
-
# Full Selenium automation flow:
-
# - navigate home feed
-
# - optionally engage one story first (hold/freeze until reply)
-
# - find image posts, download media, store profile history, analyze, generate comment, post first suggestion
-
# - capture HTML/JSON/screenshot artifacts at each step
-
def auto_engage_home_feed!(max_posts: 3, include_story: true, story_hold_seconds: 18)
-
max_posts_i = max_posts.to_i.clamp(1, 10)
-
include_story_bool = ActiveModel::Type::Boolean.new.cast(include_story)
-
hold_seconds_i = story_hold_seconds.to_i.clamp(8, 40)
-
-
with_recoverable_session(label: "auto_engage_home_feed") do
-
with_authenticated_driver do |driver|
-
with_task_capture(
-
driver: driver,
-
task_name: "auto_engage_home_feed_start",
-
meta: { max_posts: max_posts_i, include_story: include_story_bool, story_hold_seconds: hold_seconds_i }
-
) do
-
driver.navigate.to(INSTAGRAM_BASE_URL)
-
wait_for(driver, css: "body", timeout: 12)
-
dismiss_common_overlays!(driver)
-
capture_task_html(driver: driver, task_name: "auto_engage_home_loaded", status: "ok")
-
-
story_result =
-
if include_story_bool
-
auto_engage_first_story!(driver: driver, story_hold_seconds: hold_seconds_i)
-
else
-
{ attempted: false, replied: false }
-
end
-
-
driver.navigate.to(INSTAGRAM_BASE_URL)
-
wait_for(driver, css: "body", timeout: 12)
-
dismiss_common_overlays!(driver)
-
sleep(0.6)
-
capture_task_html(driver: driver, task_name: "auto_engage_home_before_posts", status: "ok")
-
-
feed_items = extract_feed_items_from_dom(driver).select do |item|
-
item[:post_kind] == "post" &&
-
item[:shortcode].to_s.present? &&
-
item[:media_url].to_s.start_with?("http://", "https://")
-
end
-
capture_task_html(
-
driver: driver,
-
task_name: "auto_engage_posts_discovered",
-
status: "ok",
-
meta: { discovered_posts: feed_items.length, max_posts: max_posts_i }
-
)
-
-
processed = 0
-
commented = 0
-
details = []
-
-
feed_items.each do |item|
-
break if processed >= max_posts_i
-
processed += 1
-
-
begin
-
result = auto_engage_feed_post!(driver: driver, item: item)
-
details << result
-
commented += 1 if result[:comment_posted] == true
-
rescue StandardError => e
-
details << {
-
shortcode: item[:shortcode],
-
username: item[:author_username],
-
comment_posted: false,
-
error: e.message.to_s
-
}
-
end
-
end
-
-
{
-
story_replied: story_result[:replied] == true,
-
posts_commented: commented,
-
posts_processed: processed,
-
details: details
-
}
-
end
-
end
-
end
-
end
-
-
end
-
end
-
end
-
module Instagram
-
class Client
-
module FeedFetchingService
-
def fetch_profile_feed_items_for_analysis(username:, user_id:, posts_limit:)
-
http_result = fetch_profile_feed_items_via_http(
-
username: username,
-
user_id: user_id,
-
posts_limit: posts_limit
-
)
-
return http_result if Array(http_result[:items]).any?
-
-
browser_result = fetch_profile_feed_items_via_browser_context(
-
username: username,
-
user_id_hint: user_id,
-
posts_limit: posts_limit
-
)
-
return browser_result if Array(browser_result[:items]).any?
-
-
http_result.merge(
-
browser_fallback_attempted: true,
-
browser_fallback_error: browser_result[:error].to_s.presence
-
)
-
end
-
-
def fetch_profile_feed_items_via_http(username:, user_id:, posts_limit:)
-
limit = posts_limit.to_i if posts_limit.present?
-
limit = nil if limit.to_i <= 0
-
return { source: "http_feed_api", user_id: nil, pages_fetched: 0, items: [] } if user_id.to_s.blank?
-
-
remaining = limit
-
max_id = nil
-
pages = 0
-
items = []
-
seen_max_ids = Set.new
-
seen_item_keys = Set.new
-
more_available = false
-
-
loop do
-
break if pages >= PROFILE_FEED_MAX_PAGES
-
break if remaining.present? && remaining <= 0
-
break if max_id.present? && seen_max_ids.include?(max_id)
-
-
seen_max_ids << max_id if max_id.present?
-
count = remaining.present? ? [remaining, PROFILE_FEED_PAGE_SIZE].min : PROFILE_FEED_PAGE_SIZE
-
feed = fetch_user_feed(user_id: user_id, referer_username: username, count: count, max_id: max_id)
-
break unless feed.is_a?(Hash)
-
-
page_items = Array(feed["items"]).select { |item| item.is_a?(Hash) }
-
break if page_items.empty?
-
-
pages += 1
-
deduped = dedupe_profile_feed_items(items: page_items, seen_keys: seen_item_keys, max_items: remaining)
-
items.concat(deduped)
-
remaining -= deduped.length if remaining.present?
-
-
next_max_id = feed["next_max_id"].to_s.strip.presence
-
more_available = ActiveModel::Type::Boolean.new.cast(feed["more_available"])
-
max_id = next_max_id
-
break if max_id.blank?
-
end
-
-
{
-
source: "http_feed_api",
-
user_id: user_id.to_s,
-
pages_fetched: pages,
-
final_max_id: max_id,
-
more_available: more_available,
-
items: limit.present? ? items.first(limit) : items
-
}
-
rescue StandardError => e
-
{
-
source: "http_feed_api",
-
user_id: user_id.to_s.presence,
-
pages_fetched: 0,
-
error: e.message.to_s,
-
items: []
-
}
-
end
-
-
def fetch_profile_feed_items_via_browser_context(username:, user_id_hint:, posts_limit:)
-
limit = posts_limit.to_i if posts_limit.present?
-
limit = nil if limit.to_i <= 0
-
max_items = limit.present? ? limit : PROFILE_FEED_BROWSER_ITEM_CAP
-
-
with_recoverable_session(label: "profile_analysis_posts_browser_fallback") do
-
with_authenticated_driver do |driver|
-
driver.navigate.to("#{INSTAGRAM_BASE_URL}/#{username}/")
-
wait_for(driver, css: "body", timeout: 10)
-
dismiss_common_overlays!(driver)
-
-
payload =
-
driver.execute_async_script(
-
<<~JS,
-
const username = String(arguments[0] || "").trim();
-
const userIdHint = String(arguments[1] || "").trim();
-
const maxItems = Math.max(1, Number(arguments[2] || 0));
-
const pageSize = Math.max(1, Number(arguments[3] || 30));
-
const maxPages = Math.max(1, Number(arguments[4] || 100));
-
const done = arguments[arguments.length - 1];
-
-
const out = {
-
source: "browser_feed_api",
-
user_id: null,
-
pages_fetched: 0,
-
final_max_id: null,
-
items: [],
-
error: null
-
};
-
-
const readJson = async (path) => {
-
const resp = await fetch(path, {
-
method: "GET",
-
credentials: "include",
-
headers: {
-
"Accept": "application/json, text/plain, */*",
-
"X-Requested-With": "XMLHttpRequest"
-
}
-
});
-
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${path}`);
-
return await resp.json();
-
};
-
-
(async () => {
-
try {
-
let userId = userIdHint;
-
if (!userId) {
-
const profile = await readJson(`/api/v1/users/web_profile_info/?username=${encodeURIComponent(username)}`);
-
userId = String((profile && profile.data && profile.data.user && profile.data.user.id) || "").trim();
-
}
-
if (!userId) {
-
out.error = "browser_profile_user_id_missing";
-
done(out);
-
return;
-
}
-
-
out.user_id = userId;
-
let maxId = "";
-
let remaining = maxItems;
-
const seenCursors = new Set();
-
-
for (let page = 0; page < maxPages; page += 1) {
-
if (remaining <= 0) break;
-
if (maxId && seenCursors.has(maxId)) break;
-
if (maxId) seenCursors.add(maxId);
-
-
const count = Math.min(pageSize, remaining);
-
const query = new URLSearchParams({ count: String(count) });
-
if (maxId) query.set("max_id", maxId);
-
const feed = await readJson(`/api/v1/feed/user/${encodeURIComponent(userId)}/?${query.toString()}`);
-
const pageItems = Array.isArray(feed && feed.items) ? feed.items : [];
-
if (pageItems.length === 0) break;
-
-
out.items.push(...pageItems);
-
out.pages_fetched += 1;
-
remaining -= pageItems.length;
-
-
const nextMaxId = String((feed && feed.next_max_id) || "").trim();
-
if (!nextMaxId || nextMaxId === maxId) {
-
maxId = nextMaxId;
-
break;
-
}
-
maxId = nextMaxId;
-
}
-
-
out.final_max_id = maxId || null;
-
} catch (error) {
-
out.error = String((error && error.message) || error || "browser_feed_fetch_failed");
-
}
-
done(out);
-
})();
-
JS
-
username.to_s,
-
user_id_hint.to_s,
-
max_items,
-
PROFILE_FEED_PAGE_SIZE,
-
PROFILE_FEED_MAX_PAGES
-
)
-
-
payload_hash = payload.is_a?(Hash) ? payload : {}
-
seen_item_keys = Set.new
-
deduped = dedupe_profile_feed_items(
-
items: Array(payload_hash["items"]),
-
seen_keys: seen_item_keys,
-
max_items: limit
-
)
-
-
{
-
source: payload_hash["source"].to_s.presence || "browser_feed_api",
-
user_id: payload_hash["user_id"].to_s.presence,
-
pages_fetched: payload_hash["pages_fetched"].to_i,
-
final_max_id: payload_hash["final_max_id"].to_s.presence,
-
error: payload_hash["error"].to_s.presence,
-
items: deduped
-
}
-
end
-
end
-
rescue StandardError => e
-
{
-
source: "browser_feed_api",
-
user_id: user_id_hint.to_s.presence,
-
pages_fetched: 0,
-
error: e.message.to_s,
-
items: []
-
}
-
end
-
-
def extract_latest_post_from_profile_html(html)
-
return { taken_at: nil, shortcode: nil } if html.blank?
-
-
# Prefer restricting our search to a window around the timeline media payload to avoid
-
# grabbing unrelated timestamps elsewhere in the page.
-
idx = html.index("edge_owner_to_timeline_media") || html.index("timeline_media")
-
window = idx ? (html.byteslice(idx, 250_000) || "") : html
-
-
taken_at = nil
-
shortcode = nil
-
-
if (m = window.match(/\"taken_at_timestamp\":(\d{9,})/))
-
ts = m[1].to_i
-
taken_at = Time.at(ts).utc rescue nil
-
end
-
-
if (m = window.match(/\"shortcode\":\"([A-Za-z0-9_-]{5,})\"/))
-
shortcode = m[1].to_s
-
end
-
-
{ taken_at: taken_at, shortcode: shortcode }
-
rescue StandardError
-
{ taken_at: nil, shortcode: nil }
-
end
-
-
def extract_latest_post_from_profile_dom(driver)
-
with_task_capture(driver: driver, task_name: "profile_latest_post_dom") do
-
begin
-
wait_for(driver, css: "body", timeout: 6)
-
dismiss_common_overlays!(driver)
-
-
# Wait for the grid to hydrate (Instagram often renders posts after JS loads).
-
begin
-
Selenium::WebDriver::Wait.new(timeout: 12).until do
-
driver.find_elements(css: "article a[href^='/p/'], article a[href^='/reel/']").any? ||
-
driver.page_source.to_s.include?("No posts yet") ||
-
driver.page_source.to_s.include?("This Account is Private")
-
end
-
rescue Selenium::WebDriver::Error::TimeoutError
-
nil
-
end
-
-
link =
-
driver.find_elements(css: "article a[href^='/p/']").find(&:displayed?) ||
-
driver.find_elements(css: "article a[href^='/reel/']").find(&:displayed?) ||
-
driver.find_elements(css: "a[href^='/p/']").find(&:displayed?) ||
-
driver.find_elements(css: "a[href^='/reel/']").find(&:displayed?)
-
-
unless link
-
next({ taken_at: nil, shortcode: nil })
-
end
-
-
href = link.attribute("href").to_s
-
shortcode =
-
if href.include?("/p/")
-
href.split("/p/").last.to_s.split("/").first.to_s
-
elsif href.include?("/reel/")
-
href.split("/reel/").last.to_s.split("/").first.to_s
-
end
-
-
driver.execute_script("arguments[0].click()", link)
-
-
time_el = wait_for(driver, css: "time[datetime]", timeout: 10)
-
dt = time_el.attribute("datetime").to_s
-
taken_at =
-
begin
-
Time.iso8601(dt).utc
-
rescue StandardError
-
Time.parse(dt).utc
-
end
-
-
begin
-
driver.action.send_keys(:escape).perform
-
rescue StandardError
-
nil
-
end
-
-
{ taken_at: taken_at, shortcode: shortcode.presence }
-
rescue Selenium::WebDriver::Error::TimeoutError
-
{ taken_at: nil, shortcode: nil }
-
rescue StandardError
-
{ taken_at: nil, shortcode: nil }
-
end
-
end
-
end
-
-
def extract_latest_post_from_profile_http(username)
-
username = normalize_username(username)
-
return { taken_at: nil, shortcode: nil } if username.blank?
-
-
data = fetch_web_profile_info(username)
-
return { taken_at: nil, shortcode: nil } unless data.is_a?(Hash)
-
-
user = data.dig("data", "user")
-
return { taken_at: nil, shortcode: nil } unless user.is_a?(Hash)
-
-
node =
-
user.dig("edge_owner_to_timeline_media", "edges")&.first&.dig("node") ||
-
user.dig("edge_felix_video_timeline", "edges")&.first&.dig("node")
-
-
if node.is_a?(Hash)
-
ts = node["taken_at_timestamp"] || node["taken_at"] || node["taken_at_time"]
-
taken_at =
-
begin
-
ts.present? ? Time.at(ts.to_i).utc : nil
-
rescue StandardError
-
nil
-
end
-
shortcode = node["shortcode"].to_s.strip.presence
-
return { taken_at: taken_at, shortcode: shortcode }
-
end
-
-
# Fallback: fetch the user's feed items (this endpoint still works on builds where timeline edges are empty).
-
user_id = user["id"].to_s.strip
-
return { taken_at: nil, shortcode: nil } if user_id.blank?
-
-
feed = fetch_user_feed(user_id: user_id, referer_username: username, count: 6)
-
item = feed.is_a?(Hash) ? Array(feed["items"]).first : nil
-
return { taken_at: nil, shortcode: nil } unless item.is_a?(Hash)
-
-
taken_at =
-
begin
-
ts = item["taken_at"]
-
ts.present? ? Time.at(ts.to_i).utc : nil
-
rescue StandardError
-
nil
-
end
-
-
shortcode = (item["code"] || item["shortcode"]).to_s.strip.presence
-
-
{ taken_at: taken_at, shortcode: shortcode }
-
rescue StandardError
-
{ taken_at: nil, shortcode: nil }
-
end
-
-
def extract_feed_items_from_dom(driver)
-
api_items = fetch_home_feed_items_via_api(limit: 50)
-
return api_items if api_items.present?
-
-
# Instagram feed markup changes a lot. We rely on robust link patterns (/p/ and /reel/).
-
driver.execute_script(<<~JS)
-
const out = [];
-
const uniq = new Set();
-
-
const linkEls = Array.from(document.querySelectorAll("a[href^='/p/'], a[href^='/reel/']"));
-
for (const a of linkEls) {
-
const href = (a.getAttribute("href") || "").trim();
-
if (!href) continue;
-
const parts = href.split("/");
-
// /p/<shortcode>/...
-
const idx = parts.findIndex((p) => p === "p" || p === "reel");
-
if (idx < 0 || !parts[idx + 1]) continue;
-
const kind = parts[idx];
-
const shortcode = parts[idx + 1];
-
if (!shortcode || uniq.has(shortcode)) continue;
-
-
uniq.add(shortcode);
-
-
// Try to find a nearby article container for metadata.
-
let node = a;
-
for (let j = 0; j < 8; j++) {
-
if (!node) break;
-
if (node.tagName && node.tagName.toLowerCase() === "article") break;
-
node = node.parentElement;
-
}
-
const container = node && node.tagName && node.tagName.toLowerCase() === "article" ? node : a.closest("article") || a.parentElement;
-
-
// Author username: attempt to find a link that looks like /username/
-
let author = null;
-
if (container) {
-
const authorLink = Array.from(container.querySelectorAll("a[href^='/']")).find((x) => {
-
const h = (x.getAttribute("href") || "").trim();
-
if (!h) return false;
-
if (h.startsWith("/p/") || h.startsWith("/reel/") || h.startsWith("/stories/") || h.startsWith("/explore/") || h.startsWith("/direct/")) return false;
-
const seg = h.split("/").filter(Boolean)[0];
-
return seg && seg.length <= 30 && /^[A-Za-z0-9._]+$/.test(seg);
-
});
-
if (authorLink) {
-
const h = (authorLink.getAttribute("href") || "").trim();
-
author = h.split("/").filter(Boolean)[0] || null;
-
}
-
}
-
-
// Media URL: prefer the first visible img.
-
let mediaUrl = null;
-
let naturalWidth = null;
-
let naturalHeight = null;
-
if (container) {
-
const img = Array.from(container.querySelectorAll("img")).find((img) => {
-
const r = img.getBoundingClientRect();
-
return r.width > 80 && r.height > 80;
-
});
-
if (img) {
-
mediaUrl = img.currentSrc || img.getAttribute("src") || null;
-
naturalWidth = Number(img.naturalWidth || 0) || null;
-
naturalHeight = Number(img.naturalHeight || 0) || null;
-
}
-
}
-
-
out.push({
-
shortcode,
-
post_kind: kind === "reel" ? "reel" : "post",
-
author_username: author,
-
media_url: mediaUrl,
-
caption: null,
-
metadata: { href, natural_width: naturalWidth, natural_height: naturalHeight }
-
});
-
}
-
-
return out.slice(0, 60);
-
JS
-
.map do |h|
-
{
-
shortcode: h["shortcode"],
-
post_kind: h["post_kind"],
-
author_username: normalize_username(h["author_username"].to_s),
-
media_url: h["media_url"].to_s,
-
caption: h["caption"],
-
metadata: h["metadata"] || {}
-
}
-
end
-
rescue StandardError
-
[]
-
end
-
-
def dedupe_profile_feed_items(items:, seen_keys:, max_items: nil)
-
out = []
-
Array(items).each do |item|
-
next unless item.is_a?(Hash)
-
-
key =
-
item["pk"].to_s.presence ||
-
item["id"].to_s.presence ||
-
item["code"].to_s.presence ||
-
item["shortcode"].to_s.presence
-
key ||= Digest::SHA256.hexdigest(item.to_json)
-
next if key.blank? || seen_keys.include?(key)
-
-
seen_keys << key
-
out << item
-
break if max_items.present? && out.length >= max_items.to_i
-
end
-
out
-
end
-
-
def fetch_user_feed(user_id:, referer_username:, count:, max_id: nil)
-
q = [ "count=#{count.to_i.clamp(1, 30)}" ]
-
q << "max_id=#{CGI.escape(max_id.to_s)}" if max_id.present?
-
uri = URI.parse("#{INSTAGRAM_BASE_URL}/api/v1/feed/user/#{user_id}/?#{q.join('&')}")
-
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.use_ssl = (uri.scheme == "https")
-
http.open_timeout = 10
-
http.read_timeout = 20
-
-
req = Net::HTTP::Get.new(uri.request_uri)
-
req["User-Agent"] = @account.user_agent.presence || "Mozilla/5.0"
-
req["Accept"] = "application/json, text/plain, */*"
-
req["X-Requested-With"] = "XMLHttpRequest"
-
req["X-IG-App-ID"] = (@account.auth_snapshot.dig("ig_app_id").presence || "936619743392459")
-
req["Referer"] = "#{INSTAGRAM_BASE_URL}/#{referer_username}/"
-
-
csrf = @account.cookies.find { |c| c["name"].to_s == "csrftoken" }&.dig("value").to_s
-
req["X-CSRFToken"] = csrf if csrf.present?
-
req["Cookie"] = cookie_header_for(@account.cookies)
-
-
res = http.request(req)
-
return nil unless res.is_a?(Net::HTTPSuccess)
-
-
JSON.parse(res.body.to_s)
-
rescue StandardError
-
nil
-
end
-
-
def fetch_home_feed_items_via_api(limit: 50)
-
n = limit.to_i.clamp(1, 60)
-
body = ig_api_get_json(path: "/api/v1/feed/timeline/?count=#{n}", referer: INSTAGRAM_BASE_URL)
-
return [] unless body.is_a?(Hash)
-
-
# Newer payloads often use feed_items with nested media_or_ad.
-
feed_items = Array(body["feed_items"])
-
raw_items =
-
if feed_items.present?
-
feed_items.map { |entry| entry.is_a?(Hash) ? (entry["media_or_ad"] || entry["media"]) : nil }.compact
-
else
-
Array(body["items"])
-
end
-
-
raw_items.filter_map { |item| extract_home_feed_item_from_api(item) }.first(n)
-
rescue StandardError
-
[]
-
end
-
-
def extract_home_feed_item_from_api(item)
-
return nil unless item.is_a?(Hash)
-
-
shortcode = (item["code"] || item["shortcode"]).to_s.strip
-
return nil if shortcode.blank?
-
-
media_type = item["media_type"].to_i
-
product_type = item["product_type"].to_s.downcase
-
post_kind = product_type.include?("clips") ? "reel" : "post"
-
post_kind = "post" if post_kind.blank?
-
-
image_candidate =
-
if media_type == 8
-
carousel = Array(item["carousel_media"]).select { |m| m.is_a?(Hash) }
-
chosen = carousel.find { |m| m["media_type"].to_i == 2 } || carousel.find { |m| m["media_type"].to_i == 1 } || carousel.first
-
chosen&.dig("image_versions2", "candidates", 0)
-
else
-
item.dig("image_versions2", "candidates", 0)
-
end
-
video_candidate =
-
if media_type == 8
-
carousel = Array(item["carousel_media"]).select { |m| m.is_a?(Hash) }
-
chosen = carousel.find { |m| m["media_type"].to_i == 2 } || carousel.first
-
Array(chosen&.dig("video_versions")).first
-
else
-
Array(item["video_versions"]).first
-
end
-
-
image_url = CGI.unescapeHTML(image_candidate&.dig("url").to_s).strip.presence
-
video_url = CGI.unescapeHTML(video_candidate&.dig("url").to_s).strip.presence
-
width = image_candidate&.dig("width")
-
height = image_candidate&.dig("height")
-
-
{
-
shortcode: shortcode,
-
post_kind: post_kind,
-
author_username: normalize_username(item.dig("user", "username").to_s),
-
media_url: (video_url.presence || image_url).to_s,
-
caption: item.dig("caption", "text").to_s.presence,
-
metadata: {
-
source: "api_timeline",
-
media_id: (item["pk"] || item["id"]).to_s.presence,
-
media_type: media_type,
-
media_url_image: image_url.to_s.presence,
-
media_url_video: video_url.to_s.presence,
-
product_type: product_type,
-
natural_width: width,
-
natural_height: height
-
}
-
}
-
rescue StandardError
-
nil
-
end
-
end
-
end
-
end
-
module Instagram
-
class Client
-
module FollowGraphFetchingService
-
def sync_follow_graph!
-
SyncFollowGraphService.new(
-
account: @account,
-
with_recoverable_session: method(:with_recoverable_session),
-
with_authenticated_driver: method(:with_authenticated_driver),
-
collect_conversation_users: method(:collect_conversation_users),
-
collect_story_users: method(:collect_story_users),
-
collect_follow_list: method(:collect_follow_list),
-
upsert_follow_list: method(:upsert_follow_list!)
-
).call
-
end
-
-
def fetch_mutual_friends(profile_username:, limit: 36)
-
max_results = limit.to_i.clamp(1, 100)
-
fetch_mutual_friends_via_api(profile_username: profile_username, limit: max_results)
-
rescue StandardError => e
-
Rails.logger.warn("Instagram fetch_mutual_friends failed for #{profile_username}: #{e.class}: #{e.message}")
-
[]
-
end
-
-
def collect_follow_list(driver, list_kind:, profile_username:)
-
meta = { list_kind: list_kind.to_s, profile_username: profile_username }
-
-
with_task_capture(driver: driver, task_name: "sync_collect_#{list_kind}", meta: meta) do
-
api_users = fetch_follow_list_via_api(profile_username: profile_username, list_kind: list_kind)
-
if api_users.present?
-
meta[:source] = "api_friendships"
-
meta[:unique_usernames] = api_users.length
-
return api_users
-
end
-
-
meta[:source] = "html_fallback"
-
list_path = (list_kind == :followers) ? "followers" : "following"
-
list_url = "#{INSTAGRAM_BASE_URL}/#{profile_username}/#{list_path}/"
-
profile_url = "#{INSTAGRAM_BASE_URL}/#{profile_username}/"
-
-
meta[:list_url] = list_url
-
meta[:profile_url] = profile_url
-
-
dialog =
-
begin
-
meta[:open_strategy] = "direct_url"
-
driver.navigate.to(list_url)
-
wait_for(driver, css: "body", timeout: 12)
-
dismiss_common_overlays!(driver)
-
wait_for(driver, css: "div[role='dialog']", timeout: 12)
-
rescue Selenium::WebDriver::Error::TimeoutError
-
nil
-
end
-
-
unless dialog
-
# Fallback for builds that don't open the modal on the /followers/ route until after profile renders.
-
meta[:open_strategy] = "profile_click_fallback"
-
driver.navigate.to(profile_url)
-
wait_for(driver, css: "body", timeout: 12)
-
dismiss_common_overlays!(driver)
-
-
href_fragment = "/#{list_path}/"
-
-
# Some profiles render counts lazily; wait briefly for the link to appear.
-
begin
-
Selenium::WebDriver::Wait.new(timeout: 12).until do
-
driver.execute_script(<<~JS, href_fragment)
-
const frag = arguments[0];
-
const els = Array.from(document.querySelectorAll("a[href]"));
-
return els.some((a) => (a.getAttribute("href") || "").includes(frag));
-
JS
-
end
-
rescue Selenium::WebDriver::Error::TimeoutError
-
nil
-
end
-
-
clicked = false
-
attempts = 0
-
8.times do
-
attempts += 1
-
begin
-
clicked = driver.execute_script(<<~JS, href_fragment)
-
const frag = arguments[0];
-
const candidates = Array.from(document.querySelectorAll(`a[href*="${frag}"]`));
-
if (!candidates.length) return false;
-
-
const isVisible = (el) => {
-
const r = el.getBoundingClientRect();
-
const cs = window.getComputedStyle(el);
-
return cs && cs.visibility !== "hidden" && cs.display !== "none" && r.width > 0 && r.height > 0;
-
};
-
-
const link = candidates.find(isVisible) || candidates[0];
-
try { link.scrollIntoView({block: "center", inline: "nearest"}); } catch (e) {}
-
try { link.click(); return true; } catch (e) {}
-
try { link.dispatchEvent(new MouseEvent("click", {bubbles: true, cancelable: true, view: window})); return true; } catch (e) {}
-
return false;
-
JS
-
rescue Selenium::WebDriver::Error::StaleElementReferenceError,
-
Selenium::WebDriver::Error::JavascriptError,
-
Selenium::WebDriver::Error::ElementClickInterceptedError,
-
Selenium::WebDriver::Error::ElementNotInteractableError
-
clicked = false
-
end
-
-
break if clicked
-
sleep(0.35)
-
end
-
meta[:profile_link_click_attempts] = attempts
-
-
raise "Unable to find #{list_kind} link on profile" unless clicked
-
-
dialog = wait_for(driver, css: "div[role='dialog']", timeout: 12)
-
end
-
-
if (counts = extract_profile_follow_counts(driver.page_source.to_s))
-
meta[:expected_followers] = counts[:followers]
-
meta[:expected_following] = counts[:following]
-
meta[:expected_count] = (list_kind == :followers) ? counts[:followers] : counts[:following]
-
end
-
-
# The dialog often opens in a skeleton/loading state; if we start extracting immediately we'll
-
# see 0 usernames and prematurely terminate. Wait briefly for at least one profile row anchor.
-
begin
-
Selenium::WebDriver::Wait.new(timeout: 20).until do
-
driver.execute_script(<<~'JS')
-
const dialog = document.querySelector("div[role='dialog']");
-
if (!dialog) return false;
-
const anchors = Array.from(dialog.querySelectorAll("a[href^='/']"));
-
return anchors.some((a) => {
-
const href = (a.getAttribute("href") || "").trim();
-
return /^\/[A-Za-z0-9._]{1,30}\/(?:\?.*)?$/.test(href);
-
});
-
JS
-
end
-
rescue Selenium::WebDriver::Error::TimeoutError
-
# We'll still attempt extraction; capture will show the loading state HTML.
-
end
-
-
users = {}
-
stable_rounds = 0
-
last_count = 0
-
stuck_rounds = 0
-
last_scroll_top = nil
-
-
max_rounds = (list_kind == :following) ? 750 : 260
-
-
max_rounds.times do
-
payload = driver.execute_script(<<~'JS')
-
const dialog = document.querySelector("div[role='dialog']");
-
if (!dialog) return { out: [], scrolled: false, dialog_found: false };
-
-
const out = [];
-
const anchors = Array.from(dialog.querySelectorAll("a[href^='/']"));
-
for (const a of anchors) {
-
const href = (a.getAttribute("href") || "").trim();
-
const m = href.match(/^\/([A-Za-z0-9._]{1,30})\/(?:\?.*)?$/);
-
if (!m) continue;
-
-
const username = (m[1] || "").toLowerCase();
-
if (!username) continue;
-
-
// Exclude common non-profile routes that can appear in dialogs.
-
const reserved = new Set(["accounts","explore","direct","p","reel","reels","stories","about","privacy","terms"]);
-
if (reserved.has(username)) continue;
-
-
const row = a.closest("div");
-
const img = row ? row.querySelector("img") : null;
-
const pic = img ? (img.getAttribute("src") || "") : "";
-
const alt = img ? (img.getAttribute("alt") || "") : "";
-
-
// Display name is often in a sibling span; best-effort only.
-
let display = "";
-
if (row) {
-
const spans = Array.from(row.querySelectorAll("span")).map((s) => (s.textContent || "").trim()).filter(Boolean);
-
// Username is typically present; choose a non-username candidate if possible.
-
display = spans.find((t) => t.toLowerCase() !== username) || "";
-
}
-
-
if (!display && alt) {
-
// Common patterns: "Full Name's profile picture" or "Profile picture"
-
const cleaned = alt
-
.replace(/'s profile picture/gi, "")
-
.replace(/profile picture/gi, "")
-
.trim();
-
if (cleaned && cleaned.toLowerCase() !== username) display = cleaned;
-
}
-
-
out.push({ username: username, display_name: display, profile_pic_url: pic });
-
}
-
-
// Scroll the modal list to load more entries.
-
// IG sometimes places the actual scroll container on a nested node, and not always a div.
-
// Choose the scrollable element that contains the most profile-link anchors.
-
const allNodes = Array.from(dialog.querySelectorAll("*"));
-
const scrollables = allNodes.filter((el) => {
-
try { return (el.scrollHeight - el.clientHeight) > 180; } catch (e) { return false; }
-
});
-
const scoreScroller = (el) => {
-
let links = 0;
-
try {
-
const anchors = Array.from(el.querySelectorAll("a[href^='/']"));
-
for (const a of anchors) {
-
const href = (a.getAttribute("href") || "").trim();
-
if (/^\/[A-Za-z0-9._]{1,30}\/(?:\?.*)?$/.test(href)) links += 1;
-
}
-
} catch (e) {}
-
let sh = 0;
-
try { sh = el.scrollHeight || 0; } catch (e) {}
-
return { links: links, sh: sh };
-
};
-
let scroller = null;
-
let best = { links: -1, sh: -1 };
-
for (const el of scrollables) {
-
const s = scoreScroller(el);
-
// Prefer the largest scrollHeight; it tends to represent the "true" list scroller.
-
if (s.sh > best.sh || (s.sh === best.sh && s.links > best.links)) {
-
best = s;
-
scroller = el;
-
}
-
}
-
scroller = scroller || dialog;
-
let before = 0;
-
try { before = scroller.scrollTop || 0; } catch (e) {}
-
try { scroller.scrollTop = before + scroller.clientHeight * 0.95; } catch (e) {}
-
// If the computed scroller doesn't move, try a scrollBy fallback.
-
try {
-
if ((scroller.scrollTop || 0) === before) scroller.scrollBy(0, Math.max(120, scroller.clientHeight || 0));
-
} catch (e) {}
-
-
let after = before;
-
let sh = 0;
-
let ch = 0;
-
try { after = scroller.scrollTop || after; } catch (e) {}
-
try { sh = scroller.scrollHeight || 0; } catch (e) {}
-
try { ch = scroller.clientHeight || 0; } catch (e) {}
-
const at_end = (ch > 0) ? ((after + ch) >= (sh - 4)) : false;
-
const did_scroll = after !== before;
-
-
const loading = !!dialog.querySelector("[role='progressbar'], svg[aria-label='Loading...'], div[data-visualcompletion='loading-state']");
-
-
return {
-
out: out,
-
scrolled: true,
-
dialog_found: true,
-
scroll_top: after,
-
scroll_height: sh,
-
client_height: ch,
-
at_end: at_end,
-
did_scroll: did_scroll,
-
scroller_score: best,
-
scrollers_seen: scrollables.length,
-
loading: loading
-
};
-
JS
-
-
unless payload.is_a?(Hash) && (payload["dialog_found"] == true || payload[:dialog_found] == true)
-
# If the modal was replaced/closed due to navigation, stop early.
-
break
-
end
-
-
batch = payload["out"] || payload[:out] || []
-
at_end = payload["at_end"] == true || payload[:at_end] == true
-
did_scroll = payload["did_scroll"] == true || payload[:did_scroll] == true
-
loading = payload["loading"] == true || payload[:loading] == true
-
scroll_top = payload["scroll_top"] || payload[:scroll_top]
-
scroller_score = payload["scroller_score"] || payload[:scroller_score]
-
scrollers_seen = payload["scrollers_seen"] || payload[:scrollers_seen]
-
-
Array(batch).each do |entry|
-
u = normalize_username(entry["username"] || entry[:username])
-
next if u.blank?
-
-
users[u] ||= {
-
display_name: (entry["display_name"] || entry[:display_name]).presence,
-
profile_pic_url: (entry["profile_pic_url"] || entry[:profile_pic_url]).presence
-
}
-
end
-
-
if users.length == last_count
-
stable_rounds += 1
-
else
-
stable_rounds = 0
-
last_count = users.length
-
end
-
-
if scroll_top
-
if last_scroll_top && scroll_top.to_f <= (last_scroll_top.to_f + 1.0)
-
stuck_rounds += 1
-
else
-
stuck_rounds = 0
-
end
-
last_scroll_top = scroll_top
-
end
-
-
meta[:scroll_top] = scroll_top
-
meta[:scroll_stuck_rounds] = stuck_rounds
-
meta[:stable_rounds] = stable_rounds
-
meta[:at_end] = at_end
-
meta[:did_scroll] = did_scroll
-
meta[:loading] = loading
-
meta[:scroller_score] = scroller_score if scroller_score
-
meta[:scrollers_seen] = scrollers_seen if scrollers_seen
-
-
expected_count = meta[:expected_count].to_i
-
if expected_count.positive? && users.length >= expected_count
-
break
-
end
-
-
# If the modal is still loading and we haven't found anyone yet, keep waiting instead of
-
# tripping the stable_rounds safety breaker.
-
if users.empty? && loading
-
stable_rounds = 0
-
sleep(0.75)
-
next
-
end
-
-
# If we never actually scroll, IG likely swapped/locked the scroll container.
-
# Reset stable counter to allow more time and let subsequent iterations re-select the scroller.
-
unless did_scroll
-
stable_rounds = 0 if stable_rounds < 4
-
end
-
-
# Break only once we hit the end of the scroll region and nothing new has loaded for a bit.
-
far_from_expected =
-
expected_count.positive? && users.length < (expected_count * 0.98).floor
-
-
break if at_end && stable_rounds >= 3 && !far_from_expected
-
-
break if (stuck_rounds >= 25) && !far_from_expected
-
break if (stable_rounds >= 60) && !far_from_expected
-
-
sleep(
-
if loading
-
0.8
-
elsif stable_rounds >= 10
-
1.15
-
elsif stable_rounds >= 3
-
0.8
-
else
-
0.4
-
end
-
)
-
end
-
-
meta[:unique_usernames] = users.length
-
-
begin
-
driver.action.send_keys(:escape).perform
-
rescue StandardError
-
nil
-
end
-
-
users
-
end
-
end
-
-
def upsert_follow_list!(users_hash, following_flag:, follows_you_flag:)
-
now = Time.current
-
-
users_hash.each do |username, attrs|
-
profile = @account.instagram_profiles.find_or_initialize_by(username: username)
-
-
# If we already have a profile_pic_url, keep it unless we received a new one.
-
new_pic = attrs.dig(:profile_pic_url).presence
-
profile.profile_pic_url = new_pic if new_pic.present?
-
-
new_display = attrs.dig(:display_name).presence
-
profile.display_name = new_display if new_display.present?
-
-
profile.following = true if following_flag
-
profile.follows_you = true if follows_you_flag
-
profile.last_synced_at = now
-
profile.save!
-
end
-
end
-
-
def fetch_follow_list_via_api(profile_username:, list_kind:)
-
uname = normalize_username(profile_username)
-
return {} if uname.blank?
-
-
web_info = fetch_web_profile_info(uname)
-
user = web_info.is_a?(Hash) ? web_info.dig("data", "user") : nil
-
user_id = user.is_a?(Hash) ? user["id"].to_s.strip : ""
-
return {} if user_id.blank?
-
-
endpoint = (list_kind.to_sym == :followers) ? "followers" : "following"
-
max_id = nil
-
users = {}
-
safety = 0
-
-
loop do
-
safety += 1
-
break if safety > 25
-
-
query = [ "count=200" ]
-
query << "max_id=#{CGI.escape(max_id)}" if max_id.present?
-
path = "/api/v1/friendships/#{user_id}/#{endpoint}/?#{query.join('&')}"
-
body = ig_api_get_json(path: path, referer: "#{INSTAGRAM_BASE_URL}/#{uname}/")
-
break unless body.is_a?(Hash)
-
-
Array(body["users"]).each do |entry|
-
next unless entry.is_a?(Hash)
-
username = normalize_username(entry["username"])
-
next if username.blank?
-
-
users[username] ||= {
-
display_name: entry["full_name"].to_s.strip.presence || username,
-
profile_pic_url: CGI.unescapeHTML(entry["profile_pic_url"].to_s).strip.presence
-
}
-
end
-
-
max_id = body["next_max_id"].to_s.strip.presence
-
break if max_id.blank?
-
end
-
-
users
-
rescue StandardError
-
{}
-
end
-
-
def fetch_mutual_friends_via_api(profile_username:, limit:)
-
uname = normalize_username(profile_username)
-
return [] if uname.blank?
-
-
web_info = fetch_web_profile_info(uname)
-
user = web_info.is_a?(Hash) ? web_info.dig("data", "user") : nil
-
user_id = user.is_a?(Hash) ? user["id"].to_s.strip : ""
-
return [] if user_id.blank?
-
-
max_results = limit.to_i.clamp(1, 100)
-
max_id = nil
-
safety = 0
-
mutuals = []
-
seen_usernames = Set.new
-
following_usernames_cache = nil
-
-
loop do
-
break if mutuals.length >= max_results
-
safety += 1
-
break if safety > 25
-
-
query = [ "count=200", "search_surface=follow_list_page", "query=", "enable_groups=true" ]
-
query << "max_id=#{CGI.escape(max_id)}" if max_id.present?
-
-
path = "/api/v1/friendships/#{user_id}/followers/?#{query.join('&')}"
-
body = ig_api_get_json(path: path, referer: "#{INSTAGRAM_BASE_URL}/#{uname}/")
-
break unless body.is_a?(Hash)
-
-
users = Array(body["users"]).select { |entry| entry.is_a?(Hash) }
-
break if users.empty?
-
-
users.each do |entry|
-
username = normalize_username(entry["username"])
-
next if username.blank? || seen_usernames.include?(username)
-
-
friendship_status = entry["friendship_status"].is_a?(Hash) ? entry["friendship_status"] : {}
-
follows_from_status =
-
if friendship_status.key?("following")
-
ActiveModel::Type::Boolean.new.cast(friendship_status["following"])
-
end
-
-
viewer_follows =
-
if follows_from_status.nil?
-
following_usernames_cache ||= @account.instagram_profiles.where(following: true).pluck(:username).map { |u| normalize_username(u) }.to_set
-
following_usernames_cache.include?(username)
-
else
-
follows_from_status
-
end
-
-
next unless viewer_follows
-
-
seen_usernames << username
-
mutuals << {
-
username: username,
-
display_name: entry["full_name"].to_s.strip.presence || username,
-
profile_pic_url: CGI.unescapeHTML(entry["profile_pic_url"].to_s).strip.presence
-
}
-
break if mutuals.length >= max_results
-
end
-
-
max_id = body["next_max_id"].to_s.strip.presence
-
break if max_id.blank?
-
end
-
-
mutuals
-
rescue StandardError
-
[]
-
end
-
end
-
end
-
end
-
module Instagram
-
class Client
-
class ProfileAnalysisDatasetService
-
def initialize(
-
fetch_profile_details:,
-
fetch_web_profile_info:,
-
fetch_profile_feed_items_for_analysis:,
-
extract_post_for_analysis:,
-
enrich_missing_post_comments_via_browser:,
-
normalize_username:
-
)
-
@fetch_profile_details = fetch_profile_details
-
@fetch_web_profile_info = fetch_web_profile_info
-
@fetch_profile_feed_items_for_analysis = fetch_profile_feed_items_for_analysis
-
@extract_post_for_analysis = extract_post_for_analysis
-
@enrich_missing_post_comments_via_browser = enrich_missing_post_comments_via_browser
-
@normalize_username = normalize_username
-
end
-
-
def call(username:, posts_limit: nil, comments_limit: 8)
-
normalized_username = normalize_username.call(username)
-
raise "Username cannot be blank" if normalized_username.blank?
-
-
details = fetch_profile_details.call(username: normalized_username)
-
web_info = fetch_web_profile_info.call(normalized_username)
-
user = web_info.is_a?(Hash) ? web_info.dig("data", "user") : nil
-
user_id = user.is_a?(Hash) ? user["id"].to_s.strip.presence : nil
-
user_id ||= details[:ig_user_id].to_s.strip.presence if details.is_a?(Hash)
-
-
feed_result = fetch_profile_feed_items_for_analysis.call(
-
username: normalized_username,
-
user_id: user_id,
-
posts_limit: posts_limit
-
)
-
items = Array(feed_result[:items])
-
-
posts = items.filter_map do |item|
-
extract_post_for_analysis.call(item, comments_limit: comments_limit, referer_username: normalized_username)
-
end
-
-
enrich_missing_post_comments_via_browser.call(
-
username: normalized_username,
-
posts: posts,
-
comments_limit: comments_limit
-
)
-
-
{
-
profile: details,
-
posts: posts,
-
fetched_at: Time.current,
-
feed_fetch: feed_result.except(:items)
-
}
-
end
-
-
private
-
-
attr_reader :fetch_profile_details,
-
:fetch_web_profile_info,
-
:fetch_profile_feed_items_for_analysis,
-
:extract_post_for_analysis,
-
:enrich_missing_post_comments_via_browser,
-
:normalize_username
-
end
-
end
-
end
-
module Instagram
-
class Client
-
module ProfileFetchingService
-
def fetch_profile_details!(username:)
-
with_recoverable_session(label: "fetch_profile_details") do
-
with_authenticated_driver do |driver|
-
fetch_profile_details_from_driver(driver, username: username)
-
end
-
end
-
end
-
-
def fetch_profile_details_and_verify_messageability!(username:)
-
with_recoverable_session(label: "fetch_profile_details_and_verify_messageability") do
-
with_authenticated_driver do |driver|
-
details = fetch_profile_details_from_driver(driver, username: username)
-
eligibility = verify_messageability_from_api(username: username)
-
if eligibility[:can_message].nil?
-
eligibility = verify_messageability_from_driver(driver, username: username)
-
end
-
details.merge(eligibility)
-
end
-
end
-
end
-
-
def fetch_eligibility(driver, username)
-
with_task_capture(driver: driver, task_name: "sync_fetch_eligibility", meta: { username: username }) do
-
api_result = verify_messageability_from_api(username: username)
-
if api_result.is_a?(Hash) && !api_result[:can_message].nil?
-
return {
-
can_message: api_result[:can_message],
-
restriction_reason: api_result[:restriction_reason],
-
source: "api",
-
dm_state: api_result[:dm_state],
-
dm_reason: api_result[:dm_reason],
-
dm_retry_after_at: api_result[:dm_retry_after_at]
-
}
-
end
-
-
driver.navigate.to("#{INSTAGRAM_BASE_URL}/#{username}/")
-
wait_for(driver, css: "body", timeout: 8)
-
-
page = driver.page_source.to_s
-
page_down = page.downcase
-
-
# If we hit a generic error page or an interstitial, eligibility is unknown.
-
if page_down.include?("something went wrong") ||
-
page_down.include?("unexpected error") ||
-
page_down.include?("polarishttp500") ||
-
page_down.include?("try again later")
-
return { can_message: false, restriction_reason: "Unable to verify messaging availability (profile load error)" }
-
end
-
-
# "Message" often renders as <div role="button"> on modern IG builds (not only <button>).
-
message_cta =
-
driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][normalize-space()='Message']").first ||
-
driver.find_elements(xpath: "//*[self::a and @role='link' and normalize-space()='Message']").first
-
-
follow_cta =
-
driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][normalize-space()='Follow']").first ||
-
driver.find_elements(xpath: "//*[self::button or (self::div and @role='button')][normalize-space()='Requested']").first
-
-
if message_cta
-
{ can_message: true, restriction_reason: nil }
-
elsif follow_cta
-
{ can_message: false, restriction_reason: "User is not currently messageable from this account" }
-
elsif page_down.include?("private")
-
{ can_message: false, restriction_reason: "Private or restricted profile" }
-
else
-
{ can_message: false, restriction_reason: "Unable to verify messaging availability" }
-
end
-
end
-
end
-
-
def fetch_web_profile_info(username)
-
# Unofficial endpoint used by the Instagram web app; requires authenticated cookies.
-
uri = URI.parse("#{INSTAGRAM_BASE_URL}/api/v1/users/web_profile_info/?username=#{username}")
-
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.use_ssl = (uri.scheme == "https")
-
http.open_timeout = 10
-
http.read_timeout = 20
-
-
req = Net::HTTP::Get.new(uri.request_uri)
-
req["User-Agent"] = @account.user_agent.presence || "Mozilla/5.0"
-
req["Accept"] = "application/json, text/plain, */*"
-
req["X-Requested-With"] = "XMLHttpRequest"
-
req["X-IG-App-ID"] = (@account.auth_snapshot.dig("ig_app_id").presence || "936619743392459")
-
req["Referer"] = "#{INSTAGRAM_BASE_URL}/#{username}/"
-
-
csrf = @account.cookies.find { |c| c["name"].to_s == "csrftoken" }&.dig("value").to_s
-
req["X-CSRFToken"] = csrf if csrf.present?
-
req["Cookie"] = cookie_header_for(@account.cookies)
-
-
res = http.request(req)
-
return nil unless res.is_a?(Net::HTTPSuccess)
-
-
JSON.parse(res.body.to_s)
-
rescue StandardError
-
nil
-
end
-
-
def fetch_profile_details_from_driver(driver, username:)
-
username = normalize_username(username)
-
raise "Username cannot be blank" if username.blank?
-
-
with_task_capture(driver: driver, task_name: "profile_fetch_details", meta: { username: username }) do
-
api_details = fetch_profile_details_via_api(username)
-
return api_details if api_details.present?
-
-
driver.navigate.to("#{INSTAGRAM_BASE_URL}/#{username}/")
-
wait_for(driver, css: "body", timeout: 10)
-
dismiss_common_overlays!(driver)
-
-
html = driver.page_source.to_s
-
-
display_name = nil
-
if (og = html.match(/property=\"og:title\" content=\"([^\"]+)\"/))
-
og_title = CGI.unescapeHTML(og[1].to_s)
-
# Examples: "Name (@username) • Instagram photos and videos"
-
if (m = og_title.match(/\A(.+?)\s*\(@#{Regexp.escape(username)}\)\b/))
-
display_name = m[1].to_s.strip
-
end
-
end
-
-
pic = nil
-
if (img = html.match(/property=\"og:image\" content=\"([^\"]+)\"/))
-
pic = CGI.unescapeHTML(img[1].to_s).strip
-
end
-
-
web_info = fetch_web_profile_info(username)
-
web_user = web_info.is_a?(Hash) ? web_info.dig("data", "user") : nil
-
ig_user_id = web_user.is_a?(Hash) ? web_user["id"].to_s.strip.presence : nil
-
bio = web_user.is_a?(Hash) ? web_user["biography"].to_s.presence : nil
-
full_name = web_user.is_a?(Hash) ? web_user["full_name"].to_s.strip.presence : nil
-
followers_count = web_user.is_a?(Hash) ? normalize_count(web_user["follower_count"]) : nil
-
followers_count ||= extract_profile_follow_counts(html)&.dig(:followers)
-
category_name = web_user.is_a?(Hash) ? web_user["category_name"].to_s.strip.presence : nil
-
is_business_account = web_user.is_a?(Hash) ? ActiveModel::Type::Boolean.new.cast(web_user["is_business_account"]) : nil
-
-
display_name ||= full_name
-
-
post = extract_latest_post_from_profile_dom(driver)
-
post = extract_latest_post_from_profile_html(html) if post[:taken_at].blank? && post[:shortcode].blank?
-
post = extract_latest_post_from_profile_http(username) if post[:taken_at].blank? && post[:shortcode].blank?
-
-
{
-
username: username,
-
display_name: display_name,
-
profile_pic_url: pic,
-
ig_user_id: ig_user_id,
-
bio: bio,
-
followers_count: followers_count,
-
category_name: category_name,
-
is_business_account: is_business_account,
-
last_post_at: post[:taken_at],
-
latest_post_shortcode: post[:shortcode]
-
}
-
end
-
end
-
-
def fetch_profile_details_via_api(username)
-
uname = normalize_username(username)
-
return nil if uname.blank?
-
-
web_info = fetch_web_profile_info(uname)
-
user = web_info.is_a?(Hash) ? web_info.dig("data", "user") : nil
-
return nil unless user.is_a?(Hash)
-
-
latest = extract_latest_post_from_profile_http(uname)
-
-
{
-
username: uname,
-
display_name: user["full_name"].to_s.strip.presence,
-
profile_pic_url: CGI.unescapeHTML(user["profile_pic_url_hd"].to_s).strip.presence || CGI.unescapeHTML(user["profile_pic_url"].to_s).strip.presence,
-
ig_user_id: user["id"].to_s.strip.presence,
-
bio: user["biography"].to_s.presence,
-
followers_count: normalize_count(user["follower_count"]),
-
category_name: user["category_name"].to_s.strip.presence,
-
is_business_account: ActiveModel::Type::Boolean.new.cast(user["is_business_account"]),
-
last_post_at: latest[:taken_at],
-
latest_post_shortcode: latest[:shortcode]
-
}
-
rescue StandardError
-
nil
-
end
-
end
-
end
-
end
-
module Instagram
-
class Client
-
class ProfileStoryDatasetService
-
def initialize(
-
fetch_profile_details:,
-
fetch_web_profile_info:,
-
fetch_story_reel:,
-
extract_story_item:,
-
normalize_username:
-
)
-
@fetch_profile_details = fetch_profile_details
-
@fetch_web_profile_info = fetch_web_profile_info
-
@fetch_story_reel = fetch_story_reel
-
@extract_story_item = extract_story_item
-
@normalize_username = normalize_username
-
end
-
-
def call(username:, stories_limit: 20)
-
normalized_username = normalize_username.call(username)
-
raise "Username cannot be blank" if normalized_username.blank?
-
-
details = fetch_profile_details.call(username: normalized_username)
-
web_info = fetch_web_profile_info.call(normalized_username)
-
user = web_info.is_a?(Hash) ? web_info.dig("data", "user") : nil
-
user_id = user.is_a?(Hash) ? user["id"].to_s.strip : ""
-
-
reel = user_id.present? ? fetch_story_reel.call(user_id: user_id, referer_username: normalized_username) : nil
-
raw_items = reel.is_a?(Hash) ? Array(reel["items"]) : []
-
-
stories = raw_items.first(stories_limit.to_i.clamp(1, 30)).filter_map do |item|
-
extract_story_item.call(item, username: normalized_username, reel_owner_id: user_id)
-
end
-
-
{
-
profile: details,
-
user_id: user_id.presence,
-
stories: stories,
-
fetched_at: Time.current
-
}
-
end
-
-
private
-
-
attr_reader :fetch_profile_details, :fetch_web_profile_info, :fetch_story_reel, :extract_story_item, :normalize_username
-
end
-
end
-
end
-
module Instagram
-
class Client
-
module SessionRecoverySupport
-
private
-
-
def with_recoverable_session(label:, max_attempts: 2)
-
attempt = 0
-
-
begin
-
attempt += 1
-
yield
-
rescue StandardError => e
-
raise unless disconnected_session_error?(e)
-
raise if attempt >= max_attempts
-
-
Rails.logger.warn("Instagram #{label} recovered from browser disconnect (attempt #{attempt}/#{max_attempts}).")
-
sleep(1)
-
retry
-
end
-
end
-
-
def disconnected_session_error?(error)
-
return true if error.is_a?(Selenium::WebDriver::Error::InvalidSessionIdError)
-
-
message = error.message.to_s.downcase
-
message.include?("not connected to devtools") ||
-
message.include?("session deleted as the browser has closed the connection") ||
-
message.include?("disconnected")
-
end
-
end
-
end
-
end
-
module Instagram
-
class Client
-
class SessionValidationService
-
AUTHENTICATED_SELECTORS = [
-
"svg[aria-label='Home']",
-
"svg[aria-label='Search']",
-
"img[alt*='profile picture']",
-
"a[href*='/direct/inbox/']",
-
"[aria-label='Settings']",
-
".x9f619",
-
".x78zum5",
-
".x1i10hfl"
-
].freeze
-
-
PROFILE_INDICATORS = [
-
"img[alt*='profile picture']",
-
"h2",
-
"a[href*='/followers/']",
-
"a[href*='/following/']"
-
].freeze
-
-
MIN_REQUIRED_INDICATORS = 3
-
-
def initialize(account:, with_driver:, wait_for:, logger: nil, base_url: Client::INSTAGRAM_BASE_URL)
-
@account = account
-
@with_driver = with_driver
-
@wait_for = wait_for
-
@logger = logger
-
@base_url = base_url
-
end
-
-
def call
-
return { valid: false, message: "No cookies stored" } if account.cookies.empty?
-
-
with_driver.call(headless: true) do |driver|
-
driver.navigate.to(base_url)
-
wait_for.call(driver, css: "body", timeout: 12)
-
-
if login_redirect?(driver.current_url)
-
return { valid: false, message: "Session expired - redirected to login page" }
-
end
-
-
begin
-
authenticated_found, found_selectors = count_visible_indicators(driver, AUTHENTICATED_SELECTORS)
-
if authenticated_found >= MIN_REQUIRED_INDICATORS
-
return validate_profile_access(driver: driver, authenticated_found: authenticated_found, found_selectors: found_selectors)
-
end
-
-
{
-
valid: false,
-
message: "Session appears to be invalid - only found #{authenticated_found}/#{AUTHENTICATED_SELECTORS.length} authentication indicators",
-
details: {
-
homepage_indicators: authenticated_found,
-
required_indicators: MIN_REQUIRED_INDICATORS,
-
found_selectors: found_selectors
-
}
-
}
-
rescue StandardError => e
-
{ valid: false, message: "Session validation error: #{e.message}" }
-
end
-
end
-
rescue StandardError => e
-
{ valid: false, message: "Validation failed: #{e.message}" }
-
end
-
-
private
-
-
attr_reader :account, :with_driver, :wait_for, :logger, :base_url
-
-
def validate_profile_access(driver:, authenticated_found:, found_selectors:)
-
driver.navigate.to("#{base_url}/#{account.username}/")
-
wait_for.call(driver, css: "body", timeout: 8)
-
-
if login_redirect?(driver.current_url)
-
return { valid: false, message: "Session invalid - cannot access profile page" }
-
end
-
-
profile_elements_found = PROFILE_INDICATORS.sum do |selector|
-
begin
-
visible_element_count(driver: driver, selector: selector).positive? ? 1 : 0
-
rescue StandardError
-
0
-
end
-
end
-
-
{
-
valid: true,
-
message: "Session is valid and authenticated (found #{authenticated_found}/#{AUTHENTICATED_SELECTORS.length} indicators, #{profile_elements_found} profile elements)",
-
details: {
-
homepage_indicators: authenticated_found,
-
profile_indicators: profile_elements_found,
-
found_selectors: found_selectors
-
}
-
}
-
end
-
-
def count_visible_indicators(driver, selectors)
-
found_selectors = []
-
count = 0
-
-
selectors.each do |selector|
-
begin
-
visible_count = visible_element_count(driver: driver, selector: selector)
-
next unless visible_count.positive?
-
-
count += 1
-
found_selectors << "#{selector} (#{visible_count})"
-
rescue StandardError => e
-
if ignorable_selector_error?(e)
-
next
-
end
-
-
logger&.warn("Validation selector error for #{selector}: #{e.message}")
-
end
-
end
-
-
[count, found_selectors]
-
end
-
-
def visible_element_count(driver:, selector:)
-
elements = driver.find_elements(css: selector)
-
elements.select(&:displayed?).length
-
end
-
-
def ignorable_selector_error?(error)
-
error.is_a?(Selenium::WebDriver::Error::NoSuchElementError) ||
-
error.is_a?(Selenium::WebDriver::Error::StaleElementReferenceError)
-
rescue NameError
-
false
-
end
-
-
def login_redirect?(url)
-
value = url.to_s
-
value.include?("/accounts/login/") || value.include?("/accounts/emailsignup/")
-
end
-
end
-
end
-
end
-
module Instagram
-
class Client
-
class SingleMessageSendService
-
def initialize(
-
with_recoverable_session:,
-
with_authenticated_driver:,
-
with_task_capture:,
-
find_profile_for_interaction:,
-
dm_interaction_retry_pending:,
-
send_direct_message_via_api:,
-
mark_profile_dm_state:,
-
apply_dm_state_from_send_result:,
-
open_dm:,
-
send_text_message_from_driver:
-
)
-
@with_recoverable_session = with_recoverable_session
-
@with_authenticated_driver = with_authenticated_driver
-
@with_task_capture = with_task_capture
-
@find_profile_for_interaction = find_profile_for_interaction
-
@dm_interaction_retry_pending = dm_interaction_retry_pending
-
@send_direct_message_via_api = send_direct_message_via_api
-
@mark_profile_dm_state = mark_profile_dm_state
-
@apply_dm_state_from_send_result = apply_dm_state_from_send_result
-
@open_dm = open_dm
-
@send_text_message_from_driver = send_text_message_from_driver
-
end
-
-
def call(username:, message_text:)
-
with_recoverable_session.call(label: "send_message") do
-
profile = find_profile_for_interaction.call(username: username)
-
if dm_interaction_retry_pending.call(profile)
-
retry_after = profile&.dm_interaction_retry_after_at
-
stamp = retry_after&.utc&.iso8601
-
raise "DM retry pending for #{username}#{stamp.present? ? " until #{stamp}" : ""}"
-
end
-
-
api_result = send_direct_message_via_api.call(username: username, message_text: message_text)
-
if api_result[:sent]
-
mark_profile_dm_state.call(
-
profile: profile,
-
state: "messageable",
-
reason: "api_text_sent",
-
retry_after_at: nil
-
)
-
return true
-
end
-
-
apply_dm_state_from_send_result.call(profile: profile, result: api_result)
-
-
with_authenticated_driver.call do |driver|
-
raise "Message cannot be blank" if message_text.to_s.strip.blank?
-
raise "Username cannot be blank" if username.to_s.strip.blank?
-
-
ok =
-
with_task_capture.call(driver: driver, task_name: "dm_open", meta: { username: username }) do
-
open_dm.call(driver, username)
-
end
-
raise "Unable to open DM for #{username}" unless ok
-
-
with_task_capture.call(
-
driver: driver,
-
task_name: "dm_send_text",
-
meta: {
-
username: username,
-
message_preview: message_text.to_s.strip.byteslice(0, 80),
-
api_fallback_reason: api_result[:reason].to_s
-
}
-
) do
-
send_text_message_from_driver.call(driver, message_text.to_s, expected_username: username)
-
end
-
mark_profile_dm_state.call(
-
profile: profile,
-
state: "messageable",
-
reason: "ui_fallback_sent",
-
retry_after_at: nil
-
)
-
sleep(0.6)
-
true
-
end
-
end
-
end
-
-
private
-
-
attr_reader :with_recoverable_session,
-
:with_authenticated_driver,
-
:with_task_capture,
-
:find_profile_for_interaction,
-
:dm_interaction_retry_pending,
-
:send_direct_message_via_api,
-
:mark_profile_dm_state,
-
:apply_dm_state_from_send_result,
-
:open_dm,
-
:send_text_message_from_driver
-
end
-
end
-
end
-
module Instagram
-
class Client
-
module StoryApiSupport
-
private
-
-
def ig_api_get_json(path:, referer:)
-
uri = URI.parse(path.to_s.start_with?("http") ? path.to_s : "#{INSTAGRAM_BASE_URL}#{path}")
-
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.use_ssl = (uri.scheme == "https")
-
http.open_timeout = 10
-
http.read_timeout = 20
-
-
req = Net::HTTP::Get.new(uri.request_uri)
-
req["User-Agent"] = @account.user_agent.presence || "Mozilla/5.0"
-
req["Accept"] = "application/json, text/plain, */*"
-
req["X-Requested-With"] = "XMLHttpRequest"
-
req["X-IG-App-ID"] = (@account.auth_snapshot.dig("ig_app_id").presence || "936619743392459")
-
req["Referer"] = referer.to_s
-
-
csrf = @account.cookies.find { |c| c["name"].to_s == "csrftoken" }&.dig("value").to_s
-
req["X-CSRFToken"] = csrf if csrf.present?
-
req["Cookie"] = cookie_header_for(@account.cookies)
-
-
res = http.request(req)
-
return nil unless res.is_a?(Net::HTTPSuccess)
-
return nil unless res["content-type"].to_s.include?("json")
-
-
JSON.parse(res.body.to_s)
-
rescue StandardError
-
nil
-
end
-
-
def fetch_story_reel(user_id:, referer_username:)
-
uri = URI.parse("#{INSTAGRAM_BASE_URL}/api/v1/feed/reels_media/?reel_ids=#{CGI.escape(user_id.to_s)}")
-
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.use_ssl = (uri.scheme == "https")
-
http.open_timeout = 10
-
http.read_timeout = 20
-
-
req = Net::HTTP::Get.new(uri.request_uri)
-
req["User-Agent"] = @account.user_agent.presence || "Mozilla/5.0"
-
req["Accept"] = "application/json, text/plain, */*"
-
req["X-Requested-With"] = "XMLHttpRequest"
-
req["X-IG-App-ID"] = (@account.auth_snapshot.dig("ig_app_id").presence || "936619743392459")
-
req["Referer"] = "#{INSTAGRAM_BASE_URL}/#{referer_username}/"
-
-
csrf = @account.cookies.find { |c| c["name"].to_s == "csrftoken" }&.dig("value").to_s
-
req["X-CSRFToken"] = csrf if csrf.present?
-
req["Cookie"] = cookie_header_for(@account.cookies)
-
-
res = http.request(req)
-
return nil unless res.is_a?(Net::HTTPSuccess)
-
-
body = JSON.parse(res.body.to_s)
-
-
# Debug: Capture raw story reel data
-
debug_story_reel_data(referer_username: referer_username, user_id: user_id, body: body)
-
-
reels = body["reels"]
-
if reels.is_a?(Hash)
-
direct = reels[user_id.to_s]
-
return direct if direct.is_a?(Hash)
-
-
by_owner = reels.values.find { |entry| reel_entry_owner_id(entry) == user_id.to_s }
-
return by_owner if by_owner.is_a?(Hash)
-
-
if reels.size == 1
-
Ops::StructuredLogger.warn(
-
event: "instagram.story_reel.single_reel_without_key_match",
-
payload: {
-
requested_user_id: user_id.to_s,
-
referer_username: referer_username.to_s,
-
available_reel_keys: reels.keys.first(6)
-
}
-
)
-
return reels.values.first
-
end
-
-
Ops::StructuredLogger.warn(
-
event: "instagram.story_reel.requested_reel_missing",
-
payload: {
-
requested_user_id: user_id.to_s,
-
referer_username: referer_username.to_s,
-
available_reel_keys: reels.keys.first(10),
-
reels_count: reels.size
-
}
-
)
-
return nil
-
end
-
-
reels_media = body["reels_media"]
-
if reels_media.is_a?(Array)
-
by_owner = reels_media.find { |entry| reel_entry_owner_id(entry) == user_id.to_s }
-
return by_owner if by_owner.is_a?(Hash)
-
-
if reels_media.length == 1
-
Ops::StructuredLogger.warn(
-
event: "instagram.story_reel.single_reel_media_without_owner_match",
-
payload: {
-
requested_user_id: user_id.to_s,
-
referer_username: referer_username.to_s
-
}
-
)
-
return reels_media.first
-
end
-
-
Ops::StructuredLogger.warn(
-
event: "instagram.story_reel.reels_media_owner_missing",
-
payload: {
-
requested_user_id: user_id.to_s,
-
referer_username: referer_username.to_s,
-
reels_media_count: reels_media.length
-
}
-
)
-
return nil
-
end
-
-
body
-
rescue StandardError
-
nil
-
end
-
-
def resolve_story_media_for_current_context(driver:, username:, story_id:, fallback_story_key:, cache: nil)
-
uname = normalize_username(username)
-
sid = story_id.to_s.strip
-
sid = "" if sid.casecmp("unknown").zero?
-
-
api_story = resolve_story_item_via_api(username: uname, story_id: sid, cache: cache)
-
if api_story.is_a?(Hash)
-
url = api_story[:media_url].to_s
-
if url.present?
-
return {
-
media_type: api_story[:media_type].to_s.presence || "unknown",
-
url: url,
-
width: api_story[:width],
-
height: api_story[:height],
-
source: "api_reels_media",
-
story_id: api_story[:story_id].to_s,
-
image_url: api_story[:image_url].to_s.presence,
-
video_url: api_story[:video_url].to_s.presence,
-
owner_user_id: api_story[:owner_user_id].to_s.presence,
-
owner_username: api_story[:owner_username].to_s.presence,
-
media_variant_count: Array(api_story[:media_variants]).length,
-
primary_media_index: api_story[:primary_media_index],
-
primary_media_source: api_story[:primary_media_source].to_s.presence,
-
carousel_media: Array(api_story[:carousel_media])
-
}
-
end
-
end
-
-
Ops::StructuredLogger.warn(
-
event: "instagram.story_media.api_unresolved",
-
payload: {
-
username: uname,
-
story_id: sid.presence || fallback_story_key.to_s,
-
source: "api_only_resolution"
-
}
-
)
-
{
-
media_type: nil,
-
url: nil,
-
width: nil,
-
height: nil,
-
source: "api_unresolved",
-
story_id: sid.presence || fallback_story_key.to_s,
-
image_url: nil,
-
video_url: nil,
-
owner_user_id: nil,
-
owner_username: nil,
-
media_variant_count: 0,
-
primary_media_index: nil,
-
primary_media_source: nil,
-
carousel_media: []
-
}
-
rescue StandardError
-
{
-
media_type: nil,
-
url: nil,
-
width: nil,
-
height: nil,
-
source: "api_unresolved_error",
-
story_id: sid.presence || fallback_story_key.to_s,
-
image_url: nil,
-
video_url: nil,
-
owner_user_id: nil,
-
owner_username: nil,
-
media_variant_count: 0,
-
primary_media_index: nil,
-
primary_media_source: nil,
-
carousel_media: []
-
}
-
end
-
-
def resolve_story_item_via_api(username:, story_id:, cache: nil)
-
uname = normalize_username(username)
-
return nil if uname.blank?
-
-
items = fetch_story_items_via_api(username: uname, cache: cache)
-
return nil unless items.is_a?(Array)
-
return nil if items.empty?
-
-
sid = story_id.to_s.strip
-
if sid.present?
-
item = items.find { |s| s.is_a?(Hash) && s[:story_id].to_s == sid }
-
return item if item
-
end
-
-
# Only pick first item without story_id when unambiguous.
-
return items.first if sid.blank? && items.length == 1
-
-
nil
-
rescue StandardError
-
nil
-
end
-
-
def fetch_story_items_via_api(username:, cache: nil)
-
uname = normalize_username(username)
-
return [] if uname.blank?
-
-
cache_key = "stories:#{uname}"
-
if cache.is_a?(Hash) && cache[cache_key].is_a?(Hash)
-
cached = cache[cache_key][:items]
-
return cached if cached.is_a?(Array)
-
end
-
-
web_info = fetch_web_profile_info(uname)
-
user = web_info.is_a?(Hash) ? web_info.dig("data", "user") : nil
-
user_id = user.is_a?(Hash) ? user["id"].to_s.strip : ""
-
return [] if user_id.blank?
-
-
reel = fetch_story_reel(user_id: user_id, referer_username: uname)
-
raw_items = reel.is_a?(Hash) ? Array(reel["items"]) : []
-
stories = raw_items.filter_map { |item| extract_story_item(item, username: uname, reel_owner_id: user_id) }
-
-
if cache.is_a?(Hash)
-
cache[cache_key] = { user_id: user_id, items: stories, fetched_at: Time.current.utc.iso8601(3) }
-
end
-
stories
-
rescue StandardError
-
[]
-
end
-
-
def extract_story_item(item, username:, reel_owner_id: nil)
-
return nil unless item.is_a?(Hash)
-
-
story_id = (item["pk"] || item["id"]).to_s.split("_").first.to_s.strip
-
return nil if story_id.blank?
-
-
media_variants = extract_story_media_variants_from_item(item)
-
selected_variant = choose_primary_story_media_variant(variants: media_variants)
-
media_type = selected_variant[:media_type].to_s.presence || story_media_type(item["media_type"])
-
image_url = selected_variant[:image_url].to_s.presence
-
video_url = selected_variant[:video_url].to_s.presence
-
media_url = selected_variant[:media_url].to_s.presence || video_url.presence || image_url.presence
-
width = selected_variant[:width]
-
height = selected_variant[:height]
-
owner_id = (item.dig("owner", "id") || item.dig("owner", "pk") || item.dig("user", "id") || item.dig("user", "pk")).to_s.strip
-
owner_username = normalize_username(item.dig("user", "username").to_s)
-
external_story_ctx = detect_external_story_attribution_from_item(
-
item: item,
-
reel_owner_id: reel_owner_id.to_s.presence || owner_id,
-
reel_username: username
-
)
-
-
{
-
story_id: story_id,
-
media_type: media_type,
-
media_url: media_url.presence || image_url.presence || video_url.presence,
-
image_url: image_url.presence,
-
video_url: video_url.presence,
-
can_reply: item.key?("can_reply") ? ActiveModel::Type::Boolean.new.cast(item["can_reply"]) : nil,
-
can_reshare: item.key?("can_reshare") ? ActiveModel::Type::Boolean.new.cast(item["can_reshare"]) : nil,
-
owner_user_id: owner_id.presence,
-
owner_username: owner_username.presence,
-
api_has_external_profile_indicator: external_story_ctx[:has_external_profile_indicator],
-
api_external_profile_reason: external_story_ctx[:reason_code],
-
api_external_profile_targets: external_story_ctx[:targets],
-
api_should_skip: external_story_ctx[:has_external_profile_indicator],
-
api_raw_media_type: item["media_type"].to_i,
-
primary_media_source: selected_variant[:source].to_s.presence,
-
primary_media_index: selected_variant[:index],
-
media_variants: media_variants,
-
carousel_media: media_variants.select { |entry| entry[:source].to_s == "carousel_media" },
-
width: width.to_i.positive? ? width.to_i : nil,
-
height: height.to_i.positive? ? height.to_i : nil,
-
caption: item.dig("caption", "text").to_s.presence,
-
taken_at: parse_unix_time(item["taken_at"] || item["taken_at_timestamp"]),
-
expiring_at: parse_unix_time(item["expiring_at"] || item["expiring_at_timestamp"]),
-
permalink: "#{INSTAGRAM_BASE_URL}/stories/#{username}/#{story_id}/"
-
}
-
rescue StandardError
-
nil
-
end
-
-
def extract_story_media_variants_from_item(item)
-
return [] unless item.is_a?(Hash)
-
-
variants = []
-
variants << build_story_media_variant(item: item, source: "root", index: 0)
-
Array(item["carousel_media"]).each_with_index do |entry, idx|
-
variants << build_story_media_variant(item: entry, source: "carousel_media", index: idx + 1)
-
end
-
variants.compact.select { |entry| entry[:media_url].to_s.present? }
-
rescue StandardError
-
[]
-
end
-
-
def build_story_media_variant(item:, source:, index:)
-
return nil unless item.is_a?(Hash)
-
-
media_type = story_media_type(item["media_type"])
-
image_candidate = item.dig("image_versions2", "candidates", 0)
-
video_candidate = Array(item["video_versions"]).first
-
image_url = CGI.unescapeHTML(image_candidate&.dig("url").to_s).strip.presence
-
video_url = CGI.unescapeHTML(video_candidate&.dig("url").to_s).strip.presence
-
media_url = media_type == "video" ? (video_url.presence || image_url.presence) : (image_url.presence || video_url.presence)
-
width = item["original_width"] || image_candidate&.dig("width") || video_candidate&.dig("width")
-
height = item["original_height"] || image_candidate&.dig("height") || video_candidate&.dig("height")
-
-
{
-
source: source.to_s,
-
index: index.to_i,
-
media_pk: (item["pk"] || item["id"]).to_s.split("_").first.to_s.presence,
-
raw_media_type: item["media_type"].to_i,
-
media_type: media_type,
-
media_url: media_url.to_s.presence,
-
image_url: image_url,
-
video_url: video_url,
-
width: width.to_i.positive? ? width.to_i : nil,
-
height: height.to_i.positive? ? height.to_i : nil
-
}
-
rescue StandardError
-
nil
-
end
-
-
def choose_primary_story_media_variant(variants:)
-
list = Array(variants).select { |entry| entry.is_a?(Hash) && entry[:media_url].to_s.present? }
-
return {} if list.empty?
-
-
root = list.find { |entry| entry[:source].to_s == "root" }
-
return root if root
-
-
video = list.find { |entry| entry[:media_type].to_s == "video" }
-
return video if video
-
-
list.first
-
rescue StandardError
-
{}
-
end
-
-
def compact_story_media_variants_for_metadata(variants, limit: 8)
-
Array(variants).first(limit.to_i.clamp(1, 20)).filter_map do |entry|
-
data = entry.is_a?(Hash) ? entry : {}
-
source = data[:source] || data["source"]
-
media_type = data[:media_type] || data["media_type"]
-
media_url = data[:media_url] || data["media_url"]
-
next nil if media_url.to_s.blank?
-
-
{
-
source: source.to_s.presence,
-
index: data[:index] || data["index"],
-
media_pk: (data[:media_pk] || data["media_pk"]).to_s.presence,
-
media_type: media_type.to_s.presence,
-
media_url: media_url.to_s.presence,
-
image_url: (data[:image_url] || data["image_url"]).to_s.presence,
-
video_url: (data[:video_url] || data["video_url"]).to_s.presence,
-
width: data[:width] || data["width"],
-
height: data[:height] || data["height"]
-
}.compact
-
end
-
rescue StandardError
-
[]
-
end
-
-
def detect_external_story_attribution_from_item(item:, reel_owner_id:, reel_username:)
-
return { has_external_profile_indicator: false, reason_code: nil, targets: [] } unless item.is_a?(Hash)
-
-
reasons = []
-
targets = []
-
normalized_owner_username = normalize_username(reel_username)
-
-
owner_id = (item.dig("owner", "id") || item.dig("owner", "pk")).to_s.strip
-
if owner_id.present? && reel_owner_id.to_s.present? && owner_id != reel_owner_id.to_s
-
reasons << "owner_id_mismatch"
-
targets << owner_id
-
end
-
-
story_feed_media = Array(item["story_feed_media"])
-
if story_feed_media.any?
-
sfm_targets = extract_story_feed_media_targets(story_feed_media)
-
sfm_external_targets = sfm_targets.select do |target|
-
external_story_target?(target, reel_owner_id: reel_owner_id, reel_username: normalized_owner_username)
-
end
-
if sfm_external_targets.any?
-
reasons << "story_feed_media_external"
-
targets.concat(sfm_external_targets)
-
end
-
end
-
-
media_attribution_targets = extract_media_attribution_targets(Array(item["media_attributions_data"]))
-
external_media_attribution_targets = media_attribution_targets.select do |target|
-
external_story_target?(target, reel_owner_id: reel_owner_id, reel_username: normalized_owner_username)
-
end
-
if external_media_attribution_targets.any?
-
reasons << "media_attributions_external"
-
targets.concat(external_media_attribution_targets)
-
end
-
-
mention_targets = extract_reel_mention_targets(Array(item["reel_mentions"]))
-
external_mention_targets = mention_targets.select do |target|
-
external_story_target?(target, reel_owner_id: reel_owner_id, reel_username: normalized_owner_username)
-
end
-
if external_mention_targets.any?
-
reasons << "reel_mentions_external"
-
targets.concat(external_mention_targets)
-
end
-
-
reasons << "reshare_of_text_post" if item["is_reshare_of_text_post_app_media_in_ig"] == true
-
-
owner_username = normalize_username(item.dig("user", "username").to_s)
-
if owner_username.present? && normalized_owner_username.present? && owner_username != normalized_owner_username
-
reasons << "owner_username_mismatch"
-
targets << owner_username
-
end
-
-
reason_codes = reasons.uniq
-
{
-
has_external_profile_indicator: reason_codes.any?,
-
reason_code: reason_codes.first,
-
targets: targets.map(&:to_s).map(&:strip).reject(&:blank?).uniq.first(12)
-
}
-
rescue StandardError
-
{ has_external_profile_indicator: false, reason_code: nil, targets: [] }
-
end
-
-
def external_story_target?(target, reel_owner_id:, reel_username:)
-
value = target.to_s.strip
-
return false if value.blank?
-
-
if value.match?(/\A\d+\z/)
-
owner_id = reel_owner_id.to_s.strip
-
return false if owner_id.blank?
-
return value != owner_id
-
end
-
-
owner_username = normalize_username(reel_username)
-
normalized_value = normalize_username(value)
-
return false if owner_username.blank? || normalized_value.blank?
-
-
normalized_value != owner_username
-
rescue StandardError
-
false
-
end
-
-
def extract_story_feed_media_targets(story_feed_media)
-
Array(story_feed_media).filter_map do |entry|
-
next unless entry.is_a?(Hash)
-
-
media_owner_id = (
-
entry.dig("media", "user", "id") ||
-
entry.dig("media", "user", "pk") ||
-
entry.dig("user", "id") ||
-
entry.dig("user", "pk")
-
).to_s.strip
-
next media_owner_id if media_owner_id.present?
-
-
media_owner_username = normalize_username(
-
entry.dig("media", "user", "username").to_s.presence ||
-
entry.dig("user", "username").to_s
-
)
-
next media_owner_username if media_owner_username.present?
-
-
compound = entry["media_compound_str"].to_s.strip
-
next "" if compound.blank?
-
next "" unless compound.include?("_")
-
-
compound.split("_")[1].to_s.strip
-
end.reject(&:blank?)
-
rescue StandardError
-
[]
-
end
-
-
def extract_reel_mention_targets(reel_mentions)
-
Array(reel_mentions).filter_map do |mention|
-
next unless mention.is_a?(Hash)
-
-
user_id = (mention.dig("user", "id") || mention.dig("user", "pk") || mention["user_id"]).to_s.strip
-
next user_id if user_id.present?
-
-
username = normalize_username(mention.dig("user", "username").to_s.presence || mention["username"].to_s)
-
next username if username.present?
-
-
nil
-
end
-
rescue StandardError
-
[]
-
end
-
-
def extract_media_attribution_targets(media_attributions_data)
-
targets = []
-
Array(media_attributions_data).each do |entry|
-
collect_candidate_user_targets(entry, targets)
-
end
-
targets.map(&:to_s).map(&:strip).reject(&:blank?).uniq
-
rescue StandardError
-
[]
-
end
-
-
def collect_candidate_user_targets(node, targets)
-
return if node.nil?
-
-
if node.is_a?(Array)
-
node.each { |child| collect_candidate_user_targets(child, targets) }
-
return
-
end
-
-
return unless node.is_a?(Hash)
-
-
username_keys = %w[username owner_username mentioned_username]
-
id_keys = %w[user_id owner_id mentioned_user_id pk id]
-
-
username_keys.each do |key|
-
value = normalize_username(node[key].to_s)
-
targets << value if value.present?
-
end
-
id_keys.each do |key|
-
value = node[key].to_s.strip
-
targets << value if value.match?(/\A\d+\z/)
-
end
-
-
node.each_value { |child| collect_candidate_user_targets(child, targets) if child.is_a?(Hash) || child.is_a?(Array) }
-
end
-
-
def story_media_type(value)
-
case value.to_i
-
when 2 then "video"
-
else "image"
-
end
-
end
-
-
def debug_story_reel_data(referer_username:, user_id:, body:)
-
begin
-
# Create debug directory if it doesn't exist
-
debug_dir = Rails.root.join("tmp", "story_reel_debug")
-
FileUtils.mkdir_p(debug_dir) unless Dir.exist?(debug_dir)
-
-
# Generate filename with timestamp
-
timestamp = Time.current.strftime("%Y%m%d_%H%M%S_%L")
-
filename = "#{referer_username}_reel_#{user_id}_#{timestamp}.json"
-
filepath = File.join(debug_dir, filename)
-
-
# Extract relevant debug information
-
debug_data = {
-
timestamp: Time.current.iso8601,
-
referer_username: referer_username,
-
user_id: user_id,
-
raw_response: body,
-
reels_count: body["reels"]&.keys&.size || 0,
-
reels_media_count: body["reels_media"]&.size || 0,
-
items_count: extract_items_count_from_body(body)
-
}
-
-
# Write debug data to file
-
File.write(filepath, JSON.pretty_generate(debug_data))
-
-
# Log the debug file creation
-
Rails.logger.info "[STORY_REEL_DEBUG] Debug data saved: #{filepath}"
-
-
rescue StandardError => e
-
Rails.logger.error "[STORY_REEL_DEBUG] Failed to capture debug data: #{e.message}"
-
# Don't fail the entire request if debug capture fails
-
end
-
end
-
-
def extract_items_count_from_body(body)
-
items = []
-
-
if body["reels"].is_a?(Hash)
-
body["reels"].each do |reel_id, reel_data|
-
if reel_data.is_a?(Hash) && reel_data["items"].is_a?(Array)
-
items.concat(reel_data["items"])
-
end
-
end
-
end
-
-
if body["reels_media"].is_a?(Array)
-
body["reels_media"].each do |reel_data|
-
if reel_data.is_a?(Hash) && reel_data["items"].is_a?(Array)
-
items.concat(reel_data["items"])
-
end
-
end
-
end
-
-
items.size
-
end
-
-
def reel_entry_owner_id(entry)
-
return "" unless entry.is_a?(Hash)
-
-
(
-
entry.dig("user", "id") ||
-
entry.dig("user", "pk") ||
-
entry.dig("owner", "id") ||
-
entry.dig("owner", "pk") ||
-
entry["id"] ||
-
entry["pk"]
-
).to_s.strip
-
rescue StandardError
-
""
-
end
-
-
def extract_post_for_analysis(item, comments_limit:, referer_username:)
-
return nil unless item.is_a?(Hash)
-
-
media_type = item["media_type"].to_i
-
product_type = item["product_type"].to_s.downcase
-
post_kind = product_type.include?("clips") ? "reel" : "post"
-
is_repost =
-
ActiveModel::Type::Boolean.new.cast(item["is_repost"]) ||
-
item.dig("reshared_content", "pk").present? ||
-
item["reshare_count"].to_i.positive?
-
image_url = nil
-
video_url = nil
-
-
if media_type == 1
-
image_url = item.dig("image_versions2", "candidates", 0, "url").to_s
-
elsif media_type == 2
-
video_url = Array(item["video_versions"]).first&.dig("url").to_s
-
image_url = item.dig("image_versions2", "candidates", 0, "url").to_s
-
elsif media_type == 8
-
carousel = Array(item["carousel_media"]).select { |m| m.is_a?(Hash) }
-
vid = carousel.find { |m| m["media_type"].to_i == 2 }
-
img = carousel.find { |m| m["media_type"].to_i == 1 }
-
video_url = Array(vid&.dig("video_versions")).first&.dig("url").to_s
-
image_url = vid&.dig("image_versions2", "candidates", 0, "url").to_s.presence || img&.dig("image_versions2", "candidates", 0, "url").to_s
-
else
-
return nil
-
end
-
-
image_url = CGI.unescapeHTML(image_url).strip
-
video_url = CGI.unescapeHTML(video_url).strip
-
media_url = video_url.presence || image_url.presence
-
return nil if media_url.blank?
-
-
media_pk = item["pk"].presence || item["id"].to_s.split("_").first
-
comments = fetch_media_comments(media_id: media_pk, referer_username: referer_username, count: comments_limit)
-
comments = extract_preview_comments(item, comments_limit: comments_limit) if comments.empty?
-
-
taken_at = parse_unix_time(item["taken_at"])
-
shortcode = (item["code"] || item["shortcode"]).to_s.strip.presence
-
permalink = shortcode.present? ? "#{INSTAGRAM_BASE_URL}/p/#{shortcode}/" : nil
-
-
{
-
shortcode: shortcode,
-
media_id: media_pk.to_s.presence,
-
post_kind: post_kind,
-
product_type: product_type.presence,
-
is_repost: is_repost,
-
taken_at: taken_at,
-
caption: item.dig("caption", "text").to_s.presence,
-
media_url: media_url,
-
image_url: image_url,
-
video_url: video_url.presence,
-
media_type: media_type,
-
permalink: permalink,
-
likes_count: item["like_count"].to_i,
-
comments_count: item["comment_count"].to_i,
-
comments: comments
-
}
-
rescue StandardError
-
nil
-
end
-
-
def extract_preview_comments(item, comments_limit:)
-
Array(item["preview_comments"]).first(comments_limit).map do |c|
-
{
-
author_username: c.is_a?(Hash) ? c.dig("user", "username").to_s.strip : nil,
-
text: c.is_a?(Hash) ? c["text"].to_s : nil,
-
created_at: parse_unix_time(c.is_a?(Hash) ? c["created_at"] : nil)
-
}
-
end
-
end
-
-
def fetch_media_comments(media_id:, referer_username:, count:)
-
return [] if media_id.to_s.blank?
-
-
uri = URI.parse("#{INSTAGRAM_BASE_URL}/api/v1/media/#{media_id}/comments/?can_support_threading=true&permalink_enabled=true")
-
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.use_ssl = (uri.scheme == "https")
-
http.open_timeout = 10
-
http.read_timeout = 20
-
-
req = Net::HTTP::Get.new(uri.request_uri)
-
req["User-Agent"] = @account.user_agent.presence || "Mozilla/5.0"
-
req["Accept"] = "application/json, text/plain, */*"
-
req["X-Requested-With"] = "XMLHttpRequest"
-
req["X-IG-App-ID"] = (@account.auth_snapshot.dig("ig_app_id").presence || "936619743392459")
-
req["Referer"] = "#{INSTAGRAM_BASE_URL}/#{referer_username}/"
-
-
csrf = @account.cookies.find { |c| c["name"].to_s == "csrftoken" }&.dig("value").to_s
-
req["X-CSRFToken"] = csrf if csrf.present?
-
req["Cookie"] = cookie_header_for(@account.cookies)
-
-
res = http.request(req)
-
return [] unless res.is_a?(Net::HTTPSuccess)
-
return [] unless res["content-type"].to_s.include?("json")
-
-
body = JSON.parse(res.body.to_s)
-
items = Array(body["comments"]).first(count.to_i.clamp(1, 50))
-
items.map do |c|
-
{
-
author_username: c.dig("user", "username").to_s.strip.presence,
-
text: c["text"].to_s,
-
created_at: parse_unix_time(c["created_at"])
-
}
-
end
-
rescue StandardError
-
[]
-
end
-
-
def enrich_missing_post_comments_via_browser!(username:, posts:, comments_limit:)
-
target_posts = Array(posts).select do |post|
-
post.is_a?(Hash) &&
-
post[:media_id].to_s.present? &&
-
post[:comments_count].to_i.positive? &&
-
Array(post[:comments]).empty?
-
end
-
return if target_posts.empty?
-
-
with_recoverable_session(label: "profile_analysis_comments_fallback") do
-
with_authenticated_driver do |driver|
-
driver.navigate.to("#{INSTAGRAM_BASE_URL}/#{username}/")
-
wait_for(driver, css: "body", timeout: 10)
-
dismiss_common_overlays!(driver)
-
-
target_posts.each do |post|
-
comments = fetch_media_comments_from_browser_context(
-
driver: driver,
-
media_id: post[:media_id],
-
count: comments_limit
-
)
-
next if comments.empty?
-
-
post[:comments] = comments
-
rescue StandardError
-
next
-
end
-
end
-
end
-
rescue StandardError
-
nil
-
end
-
-
def fetch_media_comments_from_browser_context(driver:, media_id:, count:)
-
payload =
-
driver.execute_async_script(
-
<<~JS,
-
const mediaId = arguments[0];
-
const limit = arguments[1];
-
const done = arguments[arguments.length - 1];
-
-
fetch(`/api/v1/media/${mediaId}/comments/?can_support_threading=true&permalink_enabled=true`, {
-
method: "GET",
-
credentials: "include",
-
headers: {
-
"Accept": "application/json, text/plain, */*",
-
"X-Requested-With": "XMLHttpRequest"
-
}
-
})
-
.then(async (resp) => {
-
const text = await resp.text();
-
done({
-
ok: resp.ok,
-
status: resp.status,
-
content_type: resp.headers.get("content-type") || "",
-
body: text
-
});
-
})
-
.catch((err) => {
-
done({ ok: false, status: 0, content_type: "", body: "", error: String(err) });
-
});
-
JS
-
media_id.to_s,
-
count.to_i.clamp(1, 50)
-
)
-
-
return [] unless payload.is_a?(Hash)
-
return [] unless payload["ok"] == true
-
return [] unless payload["content_type"].to_s.include?("json")
-
-
body = JSON.parse(payload["body"].to_s)
-
items = Array(body["comments"]).first(count.to_i.clamp(1, 50))
-
items.map do |c|
-
{
-
author_username: c.dig("user", "username").to_s.strip.presence,
-
text: c["text"].to_s,
-
created_at: parse_unix_time(c["created_at"])
-
}
-
end
-
rescue StandardError
-
[]
-
end
-
end
-
end
-
end
-
module Instagram
-
class Client
-
module StoryScraperService
-
# Homepage carousel-based story sync:
-
# - open home page and story tray
-
# - process up to N stories from carousel using "Next" button
-
# - for image stories: download, store, analyze, (optional) post generated comment
-
# - for video stories: download + persist to ActiveStorage and ingest for later processing
-
# - capture per-step HTML/JSON/screenshot artifacts for DOM troubleshooting
-
def sync_home_story_carousel!(story_limit: 10, auto_reply_only: false)
-
limit = story_limit.to_i.clamp(1, 50)
-
tagged_only = ActiveModel::Type::Boolean.new.cast(auto_reply_only)
-
-
with_recoverable_session(label: "sync_home_story_carousel") do
-
with_authenticated_driver do |driver|
-
with_task_capture(
-
driver: driver,
-
task_name: "home_story_sync_start",
-
meta: { story_limit: limit, auto_reply_only: tagged_only }
-
) do
-
driver.navigate.to(INSTAGRAM_BASE_URL)
-
wait_for(driver, css: "body", timeout: 12)
-
dismiss_common_overlays!(driver)
-
capture_task_html(driver: driver, task_name: "home_story_sync_home_loaded", status: "ok")
-
-
open_first_story_from_home_carousel!(driver: driver)
-
-
wait_for(driver, css: "body", timeout: 12)
-
freeze_story_progress!(driver)
-
capture_task_html(driver: driver, task_name: "home_story_sync_opened_first_story", status: "ok")
-
-
stats = { stories_visited: 0, downloaded: 0, analyzed: 0, commented: 0, reacted: 0, skipped_video: 0, skipped_not_tagged: 0, skipped_ads: 0, skipped_invalid_media: 0, skipped_unreplyable: 0, skipped_out_of_network: 0, skipped_interaction_retry: 0, skipped_reshared_external_link: 0, failed: 0 }
-
visited_refs = {}
-
story_api_cache = {}
-
safety_limit = limit * 5
-
exit_reason = "safety_limit_exhausted"
-
account_profile = find_or_create_profile_for_auto_engagement!(username: @account.username)
-
started_at = Time.current
-
account_profile.record_event!(
-
kind: "story_sync_started",
-
external_id: "story_sync_started:home_carousel:#{started_at.utc.iso8601(6)}",
-
occurred_at: started_at,
-
metadata: { source: "home_story_carousel", story_limit: limit, auto_reply_only: tagged_only }
-
)
-
-
safety_limit.times do
-
if stats[:stories_visited] >= limit
-
exit_reason = "limit_reached"
-
break
-
end
-
-
context = normalized_story_context_for_processing(driver: driver, context: current_story_context(driver))
-
if context[:story_url_recovery_needed]
-
recover_story_url_context!(driver: driver, username: context[:username], reason: "fallback_profile_url")
-
context = normalized_story_context_for_processing(driver: driver, context: current_story_context(driver))
-
end
-
-
ref = context[:ref].presence || context[:story_key].to_s
-
if ref.blank?
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_story_context_missing",
-
status: "error",
-
meta: {
-
current_url: driver.current_url.to_s,
-
page_title: driver.title.to_s,
-
resolved_username: context[:username],
-
resolved_story_id: context[:story_id]
-
}
-
)
-
fallback_username = context[:username].presence || @account.username.to_s
-
if fallback_username.present?
-
fallback_profile = find_or_create_profile_for_auto_engagement!(username: fallback_username)
-
fallback_profile.record_event!(
-
kind: "story_sync_failed",
-
external_id: "story_sync_failed:context_missing:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "home_story_carousel",
-
reason: "story_context_missing",
-
current_url: driver.current_url.to_s,
-
page_title: driver.title.to_s
-
}
-
)
-
end
-
exit_reason = "story_context_missing"
-
break
-
end
-
story_key = context[:story_key].presence || ref
-
if visited_refs[story_key]
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_duplicate_story_key",
-
status: "error",
-
meta: {
-
story_key: story_key,
-
ref: ref,
-
current_url: driver.current_url.to_s
-
}
-
)
-
moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
-
if moved
-
next
-
end
-
exit_reason = "duplicate_story_key_no_progress"
-
break
-
end
-
visited_refs[story_key] = true
-
story_id = normalize_story_id_token(context[:story_id])
-
story_id = normalize_story_id_token(ref.to_s.split(":")[1].to_s) if story_id.blank?
-
story_id = normalize_story_id_token(current_story_reference(driver.current_url.to_s).to_s.split(":")[1].to_s) if story_id.blank?
-
story_url = canonical_story_url(
-
username: context[:username],
-
story_id: story_id,
-
fallback_url: driver.current_url.to_s
-
)
-
-
stats[:stories_visited] += 1
-
freeze_story_progress!(driver)
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_story_loaded",
-
status: "ok",
-
meta: { ref: ref, story_key: story_key, username: context[:username], story_id: story_id, current_url: story_url }
-
)
-
-
if story_id.blank?
-
stats[:failed] += 1
-
fallback_profile = find_or_create_profile_for_auto_engagement!(username: context[:username].presence || @account.username.to_s)
-
fallback_profile.record_event!(
-
kind: "story_sync_failed",
-
external_id: "story_sync_failed:missing_story_id:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "home_story_carousel",
-
reason: "story_id_unresolved",
-
story_ref: ref,
-
story_key: story_key,
-
story_url: story_url
-
}
-
)
-
moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
-
unless moved
-
exit_reason = "next_navigation_failed"
-
break
-
end
-
next
-
end
-
-
profile = find_story_network_profile(username: context[:username])
-
if profile.nil?
-
stats[:skipped_out_of_network] += 1
-
account_profile.record_event!(
-
kind: "story_reply_skipped",
-
external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "home_story_carousel",
-
story_id: story_id,
-
story_ref: ref,
-
story_url: story_url,
-
reason: "profile_not_in_network",
-
status: "Out of network",
-
username: context[:username].to_s
-
}
-
)
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_out_of_network_skipped",
-
status: "ok",
-
meta: {
-
story_id: story_id,
-
story_ref: ref,
-
username: context[:username].to_s,
-
reason: "profile_not_in_network"
-
}
-
)
-
moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
-
unless moved
-
exit_reason = "next_navigation_failed"
-
break
-
end
-
next
-
end
-
-
if profile_interaction_retry_pending?(profile)
-
stats[:skipped_interaction_retry] += 1
-
stats[:skipped_unreplyable] += 1
-
profile.record_event!(
-
kind: "story_reply_skipped",
-
external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "home_story_carousel",
-
story_id: story_id,
-
story_ref: ref,
-
story_url: story_url,
-
reason: "interaction_retry_window_active",
-
status: "Interaction unavailable (retry pending)",
-
retry_after_at: profile.story_interaction_retry_after_at&.iso8601,
-
interaction_state: profile.story_interaction_state.to_s,
-
interaction_reason: profile.story_interaction_reason.to_s
-
}
-
)
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_interaction_retry_skipped",
-
status: "ok",
-
meta: {
-
story_id: story_id,
-
story_ref: ref,
-
retry_after_at: profile.story_interaction_retry_after_at&.iso8601,
-
interaction_state: profile.story_interaction_state.to_s,
-
interaction_reason: profile.story_interaction_reason.to_s
-
}
-
)
-
moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
-
unless moved
-
exit_reason = "next_navigation_failed"
-
break
-
end
-
next
-
end
-
-
media = resolve_story_media_for_current_context(
-
driver: driver,
-
username: context[:username],
-
story_id: story_id,
-
fallback_story_key: story_key,
-
cache: story_api_cache
-
)
-
if media[:url].to_s.blank?
-
stats[:failed] += 1
-
profile.record_event!(
-
kind: "story_sync_failed",
-
external_id: "story_sync_failed:#{story_id}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "home_story_carousel",
-
reason: "api_story_media_unavailable",
-
story_id: story_id,
-
story_ref: ref,
-
story_url: story_url,
-
media_source: media[:source].to_s,
-
media_variant_count: media[:media_variant_count].to_i
-
}
-
)
-
moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
-
unless moved
-
exit_reason = "next_navigation_failed"
-
break
-
end
-
next
-
end
-
-
media_story_id_hint = story_id_hint_from_media_url(media[:url])
-
if media_story_id_hint.present? && media_story_id_hint != story_id
-
stats[:failed] += 1
-
profile.record_event!(
-
kind: "story_sync_failed",
-
external_id: "story_sync_failed:#{story_id}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "home_story_carousel",
-
reason: "story_media_story_id_mismatch",
-
expected_story_id: story_id,
-
media_story_id: media_story_id_hint,
-
story_ref: ref,
-
story_url: story_url,
-
media_source: media[:source].to_s,
-
media_url: media[:url].to_s
-
}
-
)
-
moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
-
unless moved
-
exit_reason = "next_navigation_failed"
-
break
-
end
-
next
-
end
-
ad_context = detect_story_ad_context(driver: driver, media: media)
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_story_probe",
-
status: "ok",
-
meta: {
-
story_id: story_id,
-
story_ref: ref,
-
story_key: story_key,
-
username: context[:username],
-
ad_detected: ad_context[:ad_detected],
-
ad_reason: ad_context[:reason],
-
ad_marker_text: ad_context[:marker_text],
-
ad_signal_source: ad_context[:signal_source],
-
ad_signal_confidence: ad_context[:signal_confidence],
-
ad_debug_hint: ad_context[:debug_hint],
-
media_source: media[:source],
-
media_type: media[:media_type],
-
media_url: media[:url].to_s.byteslice(0, 500),
-
media_width: media[:width],
-
media_height: media[:height],
-
media_variant_count: media[:media_variant_count].to_i,
-
primary_media_source: media[:primary_media_source].to_s,
-
primary_media_index: media[:primary_media_index],
-
carousel_media_count: Array(media[:carousel_media]).length
-
}
-
)
-
if ad_context[:ad_detected]
-
stats[:skipped_ads] += 1
-
profile.record_event!(
-
kind: "story_ad_skipped",
-
external_id: "story_ad_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "home_story_carousel",
-
story_id: story_id,
-
story_ref: ref,
-
story_url: story_url,
-
reason: ad_context[:reason],
-
marker_text: ad_context[:marker_text]
-
}
-
)
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_ad_skipped",
-
status: "ok",
-
meta: {
-
story_id: story_id,
-
story_ref: ref,
-
reason: ad_context[:reason],
-
marker_text: ad_context[:marker_text]
-
}
-
)
-
moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
-
unless moved
-
exit_reason = "next_navigation_failed"
-
break
-
end
-
next
-
end
-
-
api_external_context = story_external_profile_link_context_from_api(
-
username: context[:username],
-
story_id: story_id,
-
cache: story_api_cache
-
)
-
if api_external_context[:known] && api_external_context[:has_external_profile_link]
-
stats[:skipped_reshared_external_link] += 1
-
profile.record_event!(
-
kind: "story_reply_skipped",
-
external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "home_story_carousel",
-
story_id: story_id,
-
story_ref: ref,
-
story_url: story_url,
-
reason: api_external_context[:reason_code].to_s.presence || "api_external_profile_indicator",
-
status: "External attribution detected (API)",
-
linked_username: api_external_context[:linked_username],
-
linked_profile_url: api_external_context[:linked_profile_url],
-
marker_text: api_external_context[:marker_text],
-
linked_targets: Array(api_external_context[:linked_targets])
-
}
-
)
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_external_profile_link_skipped",
-
status: "ok",
-
meta: {
-
story_id: story_id,
-
story_ref: ref,
-
linked_username: api_external_context[:linked_username],
-
linked_profile_url: api_external_context[:linked_profile_url],
-
marker_text: api_external_context[:marker_text],
-
linked_targets: Array(api_external_context[:linked_targets]),
-
reason_code: api_external_context[:reason_code]
-
}
-
)
-
moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
-
unless moved
-
exit_reason = "next_navigation_failed"
-
break
-
end
-
next
-
end
-
-
api_reply_gate = story_reply_capability_from_api(username: context[:username], story_id: story_id)
-
if api_reply_gate[:known] && api_reply_gate[:reply_possible] == false
-
stats[:skipped_unreplyable] += 1
-
retry_after = Time.current + STORY_INTERACTION_RETRY_DAYS.days
-
mark_profile_interaction_state!(
-
profile: profile,
-
state: "unavailable",
-
reason: api_reply_gate[:reason_code].to_s.presence || "api_can_reply_false",
-
reaction_available: false,
-
retry_after_at: retry_after
-
)
-
profile.record_event!(
-
kind: "story_reply_skipped",
-
external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "home_story_carousel",
-
story_id: story_id,
-
story_ref: ref,
-
story_url: story_url,
-
reason: api_reply_gate[:reason_code],
-
status: api_reply_gate[:status],
-
retry_after_at: retry_after.iso8601
-
}
-
)
-
moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
-
unless moved
-
exit_reason = "next_navigation_failed"
-
break
-
end
-
next
-
end
-
-
reply_gate =
-
if api_reply_gate[:known] && api_reply_gate[:reply_possible] == true
-
{ reply_possible: true, reason_code: nil, status: api_reply_gate[:status], marker_text: "", submission_reason: "api_can_reply_true" }
-
else
-
check_story_reply_capability(driver: driver)
-
end
-
unless reply_gate[:reply_possible]
-
reaction_result = react_to_story_if_available!(driver: driver)
-
if reaction_result[:reacted]
-
stats[:reacted] += 1
-
mark_profile_interaction_state!(
-
profile: profile,
-
state: "reaction_only",
-
reason: reply_gate[:reason_code].to_s.presence || "reply_unavailable_reaction_available",
-
reaction_available: true
-
)
-
profile.record_event!(
-
kind: "story_reaction_sent",
-
external_id: "story_reaction_sent:#{story_id}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "home_story_carousel",
-
story_id: story_id,
-
story_ref: ref,
-
story_url: story_url,
-
reaction_reason: reaction_result[:reason],
-
reaction_marker_text: reaction_result[:marker_text],
-
reply_gate_reason: reply_gate[:reason_code]
-
}
-
)
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_reaction_fallback_sent",
-
status: "ok",
-
meta: {
-
story_id: story_id,
-
story_ref: ref,
-
reaction_reason: reaction_result[:reason],
-
reaction_marker_text: reaction_result[:marker_text],
-
reply_gate_reason: reply_gate[:reason_code]
-
}
-
)
-
moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
-
unless moved
-
exit_reason = "next_navigation_failed"
-
break
-
end
-
next
-
end
-
-
stats[:skipped_unreplyable] += 1
-
retry_after = Time.current + STORY_INTERACTION_RETRY_DAYS.days
-
mark_profile_interaction_state!(
-
profile: profile,
-
state: "unavailable",
-
reason: reply_gate[:reason_code].to_s.presence || "reply_unavailable",
-
reaction_available: false,
-
retry_after_at: retry_after
-
)
-
profile.record_event!(
-
kind: "story_reply_skipped",
-
external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "home_story_carousel",
-
story_id: story_id,
-
story_ref: ref,
-
story_url: story_url,
-
reason: reply_gate[:reason_code],
-
status: reply_gate[:status],
-
submission_reason: reply_gate[:submission_reason],
-
submission_marker_text: reply_gate[:marker_text],
-
retry_after_at: retry_after.iso8601,
-
reaction_fallback_attempted: true,
-
reaction_fallback_reason: reaction_result[:reason],
-
reaction_fallback_marker_text: reaction_result[:marker_text]
-
}
-
)
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_reply_precheck_skipped",
-
status: "ok",
-
meta: {
-
story_id: story_id,
-
story_ref: ref,
-
reason: reply_gate[:reason_code],
-
status_text: reply_gate[:status],
-
marker_text: reply_gate[:marker_text],
-
retry_after_at: retry_after.iso8601,
-
reaction_fallback_reason: reaction_result[:reason],
-
reaction_fallback_marker_text: reaction_result[:marker_text]
-
}
-
)
-
moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
-
unless moved
-
exit_reason = "next_navigation_failed"
-
break
-
end
-
next
-
end
-
mark_profile_interaction_state!(
-
profile: profile,
-
state: "reply_available",
-
reason: "reply_box_found",
-
reaction_available: nil,
-
retry_after_at: nil
-
)
-
-
story_time = Time.current
-
profile.record_event!(
-
kind: "story_uploaded",
-
external_id: "story_uploaded:#{story_id}",
-
occurred_at: nil,
-
metadata: {
-
source: "home_story_carousel",
-
story_id: story_id,
-
story_ref: ref,
-
story_url: story_url
-
}
-
)
-
profile.record_event!(
-
kind: "story_viewed",
-
external_id: "story_viewed:#{story_id}:#{story_time.utc.iso8601(6)}",
-
occurred_at: story_time,
-
metadata: {
-
source: "home_story_carousel",
-
story_id: story_id,
-
story_ref: ref,
-
story_url: story_url
-
}
-
)
-
-
if media[:media_type].to_s == "video"
-
begin
-
download = download_media_with_metadata(url: media[:url], user_agent: @account.user_agent)
-
stats[:downloaded] += 1
-
now = Time.current
-
downloaded_event = profile.record_event!(
-
kind: "story_downloaded",
-
external_id: "story_downloaded:#{story_id}:#{now.utc.iso8601(6)}",
-
occurred_at: now,
-
metadata: {
-
source: "home_story_carousel",
-
story_id: story_id,
-
story_ref: ref,
-
story_url: story_url,
-
media_type: "video",
-
media_source: media[:source],
-
media_url: media[:url],
-
image_url: media[:image_url],
-
video_url: media[:video_url],
-
media_width: media[:width],
-
media_height: media[:height],
-
owner_user_id: media[:owner_user_id],
-
owner_username: media[:owner_username],
-
api_media_variant_count: media[:media_variant_count].to_i,
-
api_primary_media_source: media[:primary_media_source].to_s,
-
api_primary_media_index: media[:primary_media_index],
-
api_carousel_media: compact_story_media_variants_for_metadata(media[:carousel_media]),
-
media_content_type: download[:content_type],
-
media_bytes: download[:bytes].bytesize
-
}
-
)
-
downloaded_event.media.attach(io: StringIO.new(download[:bytes]), filename: download[:filename], content_type: download[:content_type])
-
InstagramProfileEvent.broadcast_story_archive_refresh!(account: @account)
-
StoryIngestionService.new(account: @account, profile: profile).ingest!(
-
story: {
-
story_id: story_id,
-
media_type: "video",
-
media_url: media[:url],
-
image_url: nil,
-
video_url: media[:url],
-
caption: nil,
-
permalink: story_url,
-
taken_at: story_time
-
},
-
source_event: downloaded_event,
-
bytes: download[:bytes],
-
content_type: download[:content_type],
-
filename: download[:filename]
-
)
-
rescue StandardError => e
-
stats[:failed] += 1
-
profile.record_event!(
-
kind: "story_sync_failed",
-
external_id: "story_sync_failed:#{story_id}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: { source: "home_story_carousel", story_ref: ref, error_class: e.class.name, error_message: e.message }
-
)
-
end
-
stats[:skipped_video] += 1
-
next unless click_next_story_in_carousel!(driver: driver, current_ref: ref)
-
next
-
end
-
-
duplicate_reply = story_already_replied?(
-
profile: profile,
-
story_id: story_id,
-
story_ref: ref,
-
story_url: story_url,
-
media_url: media[:url]
-
)
-
if duplicate_reply[:found]
-
profile.record_event!(
-
kind: "story_reply_skipped",
-
external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "home_story_carousel",
-
story_id: story_id,
-
story_ref: ref,
-
story_url: story_url,
-
reason: "duplicate_story_already_replied",
-
matched_by: duplicate_reply[:matched_by],
-
matched_event_external_id: duplicate_reply[:matched_external_id]
-
}
-
)
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_duplicate_reply_skipped",
-
status: "ok",
-
meta: {
-
story_id: story_id,
-
story_ref: ref,
-
matched_by: duplicate_reply[:matched_by],
-
matched_event_external_id: duplicate_reply[:matched_external_id]
-
}
-
)
-
moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
-
unless moved
-
exit_reason = "next_navigation_failed"
-
break
-
end
-
next
-
end
-
-
begin
-
download = download_media_with_metadata(url: media[:url], user_agent: @account.user_agent)
-
stats[:downloaded] += 1
-
quality = evaluate_story_image_quality(download: download, media: media)
-
if quality[:skip]
-
stats[:skipped_invalid_media] += 1
-
profile.record_event!(
-
kind: "story_reply_skipped",
-
external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "home_story_carousel",
-
story_id: story_id,
-
story_ref: ref,
-
story_url: story_url,
-
reason: "invalid_story_media",
-
quality_reason: quality[:reason],
-
quality_entropy: quality[:entropy],
-
media_type: media[:media_type],
-
media_width: media[:width],
-
media_height: media[:height],
-
media_content_type: download[:content_type],
-
media_bytes: download[:bytes].bytesize
-
}
-
)
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_invalid_media_skipped",
-
status: "ok",
-
meta: {
-
story_id: story_id,
-
story_ref: ref,
-
quality_reason: quality[:reason],
-
quality_entropy: quality[:entropy],
-
media_content_type: download[:content_type],
-
media_bytes: download[:bytes].bytesize
-
}
-
)
-
moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
-
unless moved
-
exit_reason = "next_navigation_failed"
-
break
-
end
-
next
-
end
-
now = Time.current
-
downloaded_event = profile.record_event!(
-
kind: "story_downloaded",
-
external_id: "story_downloaded:#{story_id}:#{now.utc.iso8601(6)}",
-
occurred_at: now,
-
metadata: {
-
source: "home_story_carousel",
-
story_id: story_id,
-
story_ref: ref,
-
story_url: story_url,
-
media_type: "image",
-
media_source: media[:source],
-
media_url: media[:url],
-
image_url: media[:image_url],
-
video_url: media[:video_url],
-
media_width: media[:width],
-
media_height: media[:height],
-
owner_user_id: media[:owner_user_id],
-
owner_username: media[:owner_username],
-
api_media_variant_count: media[:media_variant_count].to_i,
-
api_primary_media_source: media[:primary_media_source].to_s,
-
api_primary_media_index: media[:primary_media_index],
-
api_carousel_media: compact_story_media_variants_for_metadata(media[:carousel_media]),
-
media_content_type: download[:content_type],
-
media_bytes: download[:bytes].bytesize
-
}
-
)
-
downloaded_event.media.attach(io: StringIO.new(download[:bytes]), filename: download[:filename], content_type: download[:content_type])
-
InstagramProfileEvent.broadcast_story_archive_refresh!(account: @account)
-
-
payload = build_auto_engagement_post_payload(
-
profile: profile,
-
shortcode: story_id,
-
caption: nil,
-
permalink: story_url,
-
include_story_history: true
-
)
-
analysis = analyze_for_auto_engagement!(
-
analyzable: downloaded_event,
-
payload: payload,
-
bytes: download[:bytes],
-
content_type: download[:content_type],
-
source_url: media[:url]
-
)
-
stats[:analyzed] += 1 if analysis.present?
-
-
suggestions = generate_comment_suggestions_from_analysis!(profile: profile, payload: payload, analysis: analysis)
-
comment_text = suggestions.first.to_s.strip
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_comment_generation",
-
status: comment_text.present? ? "ok" : "error",
-
meta: { story_ref: ref, suggestions_count: suggestions.length, comment_preview: comment_text.byteslice(0, 120) }
-
)
-
-
if tagged_only && !profile_auto_reply_enabled?(profile)
-
stats[:skipped_not_tagged] += 1
-
profile.record_event!(
-
kind: "story_reply_skipped",
-
external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: { source: "home_story_carousel", story_id: story_id, story_ref: ref, story_url: story_url, reason: "missing_auto_reply_tag" }
-
)
-
elsif comment_text.blank?
-
profile.record_event!(
-
kind: "story_reply_skipped",
-
external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: { source: "home_story_carousel", story_id: story_id, story_ref: ref, story_url: story_url, reason: "no_comment_generated" }
-
)
-
else
-
comment_result = comment_on_story_via_api!(
-
story_id: story_id,
-
story_username: context[:username],
-
comment_text: comment_text
-
)
-
if !comment_result[:posted]
-
comment_result = comment_on_story_via_ui!(driver: driver, comment_text: comment_text)
-
end
-
posted = comment_result[:posted]
-
skip_status = story_reply_skip_status_for(comment_result)
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_comment_submission",
-
status: posted ? "ok" : "error",
-
meta: {
-
story_ref: ref,
-
comment_preview: comment_text.byteslice(0, 120),
-
posted: posted,
-
submission_method: comment_result[:method],
-
failure_reason: comment_result[:reason],
-
skip_status: skip_status[:status],
-
skip_reason_code: skip_status[:reason_code]
-
}
-
)
-
if posted
-
stats[:commented] += 1
-
mark_profile_interaction_state!(
-
profile: profile,
-
state: "reply_available",
-
reason: "comment_sent",
-
reaction_available: nil,
-
retry_after_at: nil
-
)
-
profile.record_event!(
-
kind: "story_reply_sent",
-
external_id: "story_reply_sent:#{story_id}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "home_story_carousel",
-
story_id: story_id,
-
story_ref: ref,
-
story_url: story_url,
-
media_url: media[:url],
-
comment_text: comment_text,
-
submission_method: comment_result[:method]
-
}
-
)
-
attach_reply_comment_to_downloaded_event!(downloaded_event: downloaded_event, comment_text: comment_text)
-
else
-
profile.record_event!(
-
kind: "story_reply_skipped",
-
external_id: "story_reply_skipped:#{story_id}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "home_story_carousel",
-
story_id: story_id,
-
story_ref: ref,
-
story_url: story_url,
-
reason: skip_status[:reason_code],
-
status: skip_status[:status],
-
submission_reason: comment_result[:reason],
-
submission_marker_text: comment_result[:marker_text]
-
}
-
)
-
end
-
end
-
rescue StandardError => e
-
stats[:failed] += 1
-
profile.record_event!(
-
kind: "story_sync_failed",
-
external_id: "story_sync_failed:#{story_id}:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: { source: "home_story_carousel", story_id: story_id, story_ref: ref, story_url: story_url, error_class: e.class.name, error_message: e.message }
-
)
-
end
-
-
moved = click_next_story_in_carousel!(driver: driver, current_ref: ref)
-
unless moved
-
exit_reason = "next_navigation_failed"
-
break
-
end
-
end
-
-
if stats[:stories_visited].zero?
-
stats[:failed] += 1
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_no_progress",
-
status: "error",
-
meta: {
-
reason: "loop_exited_without_story_processing",
-
current_url: driver.current_url.to_s,
-
page_title: driver.title.to_s,
-
stats: stats
-
}
-
)
-
account_profile.record_event!(
-
kind: "story_sync_failed",
-
external_id: "story_sync_failed:no_progress:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "home_story_carousel",
-
reason: "loop_exited_without_story_processing",
-
current_url: driver.current_url.to_s,
-
page_title: driver.title.to_s
-
}
-
)
-
end
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_end_state",
-
status: "ok",
-
meta: {
-
reason: exit_reason,
-
story_limit: limit,
-
stats: stats,
-
current_url: driver.current_url.to_s
-
}
-
)
-
account_profile.record_event!(
-
kind: "story_sync_completed",
-
external_id: "story_sync_completed:home_carousel:#{Time.current.utc.iso8601(6)}",
-
occurred_at: Time.current,
-
metadata: {
-
source: "home_story_carousel",
-
story_limit: limit,
-
auto_reply_only: tagged_only,
-
stats: stats,
-
end_reason: exit_reason
-
}
-
)
-
-
stats
-
end
-
end
-
end
-
end
-
def open_first_story_from_home_carousel!(driver:)
-
started_at = Time.current
-
deadline = started_at + 45.seconds # Further increased timeout
-
attempts = 0
-
last_probe = {}
-
prefetch_route_attempted = false
-
excluded_usernames = []
-
-
while Time.current < deadline
-
attempts += 1
-
dismiss_common_overlays!(driver)
-
-
# Force scroll to ensure stories are loaded
-
if attempts == 1
-
begin
-
driver.execute_script("window.scrollTo(0, 0);")
-
sleep(1.0)
-
rescue StandardError
-
nil
-
end
-
end
-
-
# Check if we're on the right page
-
current_url = driver.current_url.to_s
-
if !current_url.include?("instagram.com") && !current_url.include?(INSTAGRAM_BASE_URL)
-
Rails.logger.warn "Not on Instagram page, redirecting. Current URL: #{current_url}" if defined?(Rails)
-
begin
-
driver.navigate.to(INSTAGRAM_BASE_URL)
-
wait_for(driver, css: "body", timeout: 12)
-
dismiss_common_overlays!(driver)
-
sleep(2.0)
-
next
-
rescue StandardError => e
-
Rails.logger.error "Failed to redirect to Instagram: #{e.message}" if defined?(Rails)
-
next
-
end
-
end
-
-
probe = detect_home_story_carousel_probe(driver, excluded_usernames: excluded_usernames)
-
last_probe = probe
-
-
# Enhanced debugging for failed story detection
-
if attempts == 1 || (attempts % 3 == 0) || (probe[:target_count].to_i.zero? && probe[:anchor_count].to_i.zero? && probe[:prefetch_count].to_i.zero?)
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_debug_probe",
-
status: "ok",
-
meta: {
-
attempts: attempts,
-
target_count: probe[:target_count],
-
anchor_count: probe[:anchor_count],
-
prefetch_count: probe[:prefetch_count],
-
target_strategy: probe[:target_strategy],
-
debug_info: probe[:debug],
-
page_debug: probe[:page_debug],
-
current_url: current_url,
-
all_zero: probe[:target_count].to_i.zero? && probe[:anchor_count].to_i.zero? && probe[:prefetch_count].to_i.zero?
-
}
-
)
-
end
-
-
# Aggressive prefetch route attempt when no elements found
-
if !prefetch_route_attempted && attempts >= 2 && (probe[:anchor_count].to_i.zero? || probe[:target_count].to_i.zero?) && Array(probe[:prefetch_usernames]).present?
-
prefetch_route_attempted = true
-
opened = open_story_from_prefetch_usernames(
-
driver: driver,
-
usernames: Array(probe[:prefetch_usernames]),
-
attempts: attempts,
-
probe: probe
-
)
-
return true if opened
-
end
-
-
# Try direct navigation if no stories found after multiple attempts
-
if attempts >= 6 && probe[:target_count].to_i.zero? && probe[:anchor_count].to_i.zero? && probe[:prefetch_count].to_i.zero?
-
# Try to navigate to stories directly as last resort
-
begin
-
Rails.logger.info "No stories found, attempting refresh and retry" if defined?(Rails)
-
driver.navigate.to("#{INSTAGRAM_BASE_URL}/")
-
wait_for(driver, css: "body", timeout: 12)
-
dismiss_common_overlays!(driver)
-
sleep(2.0)
-
next
-
rescue StandardError
-
nil
-
end
-
end
-
-
target = probe[:target]
-
if target
-
clicked_target = false
-
begin
-
driver.action.move_to(target).click.perform
-
clicked_target = true
-
rescue StandardError
-
begin
-
js_click(driver, target)
-
clicked_target = true
-
rescue StandardError
-
clicked_target = false
-
end
-
end
-
-
if clicked_target
-
sleep(0.8)
-
dom = extract_story_dom_context(driver)
-
unless story_viewer_ready?(dom)
-
current_url = driver.current_url.to_s
-
if current_url.include?("/live/")
-
live_username = extract_username_from_profile_like_path(current_url)
-
excluded_usernames << live_username if live_username.present? && !excluded_usernames.include?(live_username)
-
end
-
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_first_story_opened",
-
status: "error",
-
meta: {
-
strategy: probe[:target_strategy],
-
attempts: attempts,
-
target_count: probe[:target_count],
-
anchor_count: probe[:anchor_count],
-
prefetch_story_usernames: probe[:prefetch_count],
-
reason: "clicked_target_but_story_frame_not_detected",
-
current_url: current_url,
-
excluded_usernames: excluded_usernames,
-
story_viewer_active: dom[:story_viewer_active],
-
story_frame_present: dom[:story_frame_present],
-
media_signature: dom[:media_signature].to_s.byteslice(0, 120),
-
debug_info: probe[:debug],
-
page_debug: probe[:page_debug]
-
}
-
)
-
begin
-
driver.navigate.to(INSTAGRAM_BASE_URL)
-
wait_for(driver, css: "body", timeout: 12)
-
rescue StandardError
-
nil
-
end
-
next
-
end
-
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_first_story_opened",
-
status: "ok",
-
meta: {
-
strategy: probe[:target_strategy],
-
attempts: attempts,
-
target_count: probe[:target_count],
-
anchor_count: probe[:anchor_count],
-
prefetch_story_usernames: probe[:prefetch_count],
-
debug_info: probe[:debug],
-
page_debug: probe[:page_debug]
-
}
-
)
-
return true
-
end
-
end
-
-
# Some IG builds rerender story nodes and invalidate Selenium element handles between probe and click.
-
# When we have candidates but no stable handle, click directly in page JS as a fallback.
-
if probe[:target_count].to_i.positive?
-
js_fallback = click_home_story_open_target_via_js(driver, excluded_usernames: excluded_usernames)
-
if js_fallback[:clicked]
-
sleep(0.8)
-
dom = extract_story_dom_context(driver)
-
if story_viewer_ready?(dom)
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_first_story_opened_js_fallback",
-
status: "ok",
-
meta: {
-
strategy: js_fallback[:strategy],
-
attempts: attempts,
-
target_count: js_fallback[:count],
-
anchor_count: probe[:anchor_count],
-
prefetch_story_usernames: probe[:prefetch_count],
-
excluded_usernames: excluded_usernames,
-
debug_info: probe[:debug],
-
page_debug: probe[:page_debug]
-
}
-
)
-
return true
-
end
-
end
-
end
-
-
# If no clickable tray anchors exist, open story route directly from prefetch usernames.
-
if !prefetch_route_attempted && attempts >= 3 && Array(probe[:prefetch_usernames]).present?
-
prefetch_route_attempted = true
-
opened = open_story_from_prefetch_usernames(
-
driver: driver,
-
usernames: Array(probe[:prefetch_usernames]),
-
attempts: attempts,
-
probe: probe
-
)
-
return true if opened
-
end
-
-
sleep(1.0)
-
# Story tray hydration can stall on initial render; one soft refresh helps recover.
-
if attempts == 8 || attempts == 15
-
begin
-
driver.navigate.refresh
-
wait_for(driver, css: "body", timeout: 12)
-
rescue StandardError
-
nil
-
end
-
end
-
end
-
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_no_carousel_found",
-
status: "error",
-
meta: {
-
attempts: attempts,
-
elapsed_seconds: (Time.current - started_at).round(2),
-
target_count: last_probe[:target_count],
-
anchor_count: last_probe[:anchor_count],
-
prefetch_story_usernames: last_probe[:prefetch_count],
-
target_strategy: last_probe[:target_strategy],
-
debug_info: last_probe[:debug],
-
page_debug: last_probe[:page_debug],
-
current_url: driver.current_url.to_s,
-
page_title: begin
-
driver.execute_script("return document.title;")
-
rescue StandardError
-
"unknown"
-
end
-
}
-
)
-
raise "No clickable active stories found in the home carousel after waiting #{(Time.current - started_at).round(1)}s (targets=#{last_probe[:target_count].to_i}, anchors=#{last_probe[:anchor_count].to_i}, prefetch=#{last_probe[:prefetch_count].to_i}, strategy=#{last_probe[:target_strategy]})"
-
end
-
def click_next_story_in_carousel!(driver:, current_ref:)
-
previous_signature = visible_story_media_signature(driver)
-
marker = find_story_next_button(driver)
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_next_button_probe",
-
status: marker[:found] ? "ok" : "error",
-
meta: {
-
current_ref: current_ref,
-
next_found: marker[:found],
-
selector: marker[:selector],
-
aria_label: marker[:aria_label],
-
outer_html_preview: marker[:outer_html_preview]
-
}
-
)
-
-
if marker[:found]
-
begin
-
el = driver.find_element(css: "[data-codex-story-next='1']")
-
driver.action.move_to(el).click.perform
-
rescue StandardError
-
begin
-
el = driver.find_element(css: "[data-codex-story-next='1']")
-
js_click(driver, el)
-
rescue StandardError
-
driver.action.send_keys(:arrow_right).perform
-
end
-
ensure
-
begin
-
driver.execute_script("const el=document.querySelector('[data-codex-story-next=\"1\"]'); if (el) el.removeAttribute('data-codex-story-next');")
-
rescue StandardError
-
nil
-
end
-
end
-
else
-
driver.action.send_keys(:arrow_right).perform
-
end
-
-
sleep(1.0)
-
new_ref = current_story_reference(driver.current_url.to_s)
-
new_signature = visible_story_media_signature(driver)
-
moved = (new_ref.present? && new_ref != current_ref) || (new_signature.present? && previous_signature.present? && new_signature != previous_signature)
-
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_after_next_click",
-
status: moved ? "ok" : "error",
-
meta: {
-
previous_ref: current_ref,
-
new_ref: new_ref,
-
previous_signature: previous_signature.to_s.byteslice(0, 120),
-
new_signature: new_signature.to_s.byteslice(0, 120),
-
moved: moved
-
}
-
)
-
moved
-
rescue StandardError => e
-
capture_task_html(
-
driver: driver,
-
task_name: "home_story_sync_next_click_error",
-
status: "error",
-
meta: { previous_ref: current_ref, error_class: e.class.name, error_message: e.message }
-
)
-
false
-
end
-
def find_story_next_button(driver)
-
payload = driver.execute_script(<<~JS)
-
const isVisible = (el) => {
-
if (!el) return false;
-
const s = window.getComputedStyle(el);
-
if (!s || s.display === "none" || s.visibility === "hidden" || s.opacity === "0") return false;
-
const r = el.getBoundingClientRect();
-
return r.width > 6 && r.height > 6;
-
};
-
-
const candidates = [
-
{ sel: "button[aria-label='Next']", label: "button[aria-label='Next']" },
-
{ sel: "button[aria-label='Next story']", label: "button[aria-label='Next story']" },
-
{ sel: "[role='button'][aria-label='Next']", label: "[role='button'][aria-label='Next']" },
-
{ sel: "[role='button'][aria-label*='Next']", label: "[role='button'][aria-label*='Next']" },
-
{ sel: "svg[aria-label='Next']", label: "svg[aria-label='Next']" },
-
{ sel: "svg[aria-label*='Next']", label: "svg[aria-label*='Next']" }
-
];
-
-
for (const c of candidates) {
-
const nodes = Array.from(document.querySelectorAll(c.sel));
-
const hit = nodes.find((n) => {
-
const target = (n.tagName && n.tagName.toLowerCase() === "svg") ? (n.closest("button,[role='button']") || n) : n;
-
return isVisible(target);
-
});
-
if (hit) {
-
const target = (hit.tagName && hit.tagName.toLowerCase() === "svg") ? (hit.closest("button,[role='button']") || hit) : hit;
-
try { target.setAttribute("data-codex-story-next", "1"); } catch (e) {}
-
return {
-
found: true,
-
selector: c.label,
-
aria_label: target.getAttribute("aria-label") || "",
-
outer_html_preview: (target.outerHTML || "").slice(0, 800)
-
};
-
}
-
}
-
-
return { found: false, selector: "", aria_label: "", outer_html_preview: "" };
-
JS
-
-
return { found: false, selector: nil, aria_label: nil, outer_html_preview: nil } unless payload.is_a?(Hash)
-
-
{
-
found: payload["found"] == true,
-
selector: payload["selector"].to_s.presence,
-
aria_label: payload["aria_label"].to_s.presence,
-
outer_html_preview: payload["outer_html_preview"].to_s.presence
-
}
-
rescue StandardError
-
{ found: false, selector: nil, aria_label: nil, outer_html_preview: nil }
-
end
-
-
end
-
end
-
end
-
module Instagram
-
class Client
-
module SyncCollectionSupport
-
private
-
-
def collect_conversation_users(driver)
-
meta = { extraction: "inbox_page_source_verify_contact_row_exists" }
-
-
with_task_capture(driver: driver, task_name: "sync_collect_conversation_users", meta: meta) do
-
api_users = fetch_conversation_users_via_api(limit: 120)
-
if api_users.present?
-
meta[:source] = "api_direct_inbox"
-
meta[:unique_usernames] = api_users.length
-
return api_users
-
end
-
-
meta[:source] = "html_fallback"
-
users = {}
-
driver.navigate.to("#{INSTAGRAM_BASE_URL}/direct/inbox/")
-
wait_for(driver, css: "body", timeout: 10)
-
-
# Inbox content is often rendered via large JSON payloads; wait for those to exist.
-
Selenium::WebDriver::Wait.new(timeout: 10).until do
-
driver.page_source.to_s.include?("verifyContactRowExists") || driver.page_source.to_s.include?("LSVerifyContactRowExists")
-
end
-
-
verify_segments_total = 0
-
extracted_total = 0
-
-
8.times do
-
html = driver.page_source.to_s
-
extracted, verify_segments = extract_conversation_users_from_inbox_html(html)
-
verify_segments_total += verify_segments
-
extracted_total += extracted.length
-
-
extracted.each do |username, attrs|
-
users[username] ||= attrs
-
end
-
-
# Inbox uses a nested scroller in many builds; try to scroll that first.
-
driver.execute_script(<<~JS)
-
const candidate =
-
document.querySelector("div[role='main']") ||
-
document.querySelector("div[role='grid']") ||
-
document.scrollingElement ||
-
document.documentElement ||
-
document.body;
-
try { candidate.scrollTop = (candidate.scrollTop || 0) + 750; } catch (e) {}
-
try { window.scrollBy(0, 750); } catch (e) {}
-
JS
-
sleep(0.4)
-
end
-
-
meta[:verify_contact_row_segments] = verify_segments_total
-
meta[:extracted_usernames_total] = extracted_total
-
meta[:unique_usernames] = users.length
-
-
users
-
end
-
end
-
-
def collect_story_users(driver)
-
meta = { extraction: "home_stories_anchors_and_regex" }
-
-
with_task_capture(driver: driver, task_name: "sync_collect_story_users", meta: meta) do
-
api_users = fetch_story_users_via_api
-
if api_users.present?
-
meta[:source] = "api_reels_tray"
-
meta[:unique_story_usernames] = api_users.length
-
return api_users
-
end
-
-
meta[:source] = "html_fallback"
-
users = {}
-
driver.navigate.to(INSTAGRAM_BASE_URL)
-
wait_for(driver, css: "body", timeout: 10)
-
-
dismiss_common_overlays!(driver)
-
-
html = driver.page_source.to_s
-
extracted_users = extract_story_users_from_home_html(html)
-
meta[:story_prefetch_usernames] = extracted_users.length
-
-
extracted_users.each do |username|
-
users[username] ||= { display_name: username }
-
end
-
-
# If we didn't get anything from prefetched query payloads, try DOM anchors as a fallback.
-
if users.empty?
-
begin
-
Selenium::WebDriver::Wait.new(timeout: 12).until do
-
driver.find_elements(css: "a[href*='/stories/']").any?
-
end
-
rescue Selenium::WebDriver::Error::TimeoutError
-
meta[:story_anchor_wait_timed_out] = true
-
end
-
-
story_hrefs = driver.find_elements(css: "a[href*='/stories/']").map { |a| a.attribute("href").to_s }.reject(&:blank?)
-
meta[:story_anchor_hrefs] = story_hrefs.length
-
-
story_hrefs.each do |href|
-
username = href.split("/stories/").last.to_s.split("/").first.to_s
-
username = normalize_username(username)
-
next if username.blank?
-
-
users[username] ||= { display_name: username }
-
end
-
-
# Fallback: parse the page source for story links even if anchors use different tag/attrs.
-
html = driver.page_source.to_s
-
story_usernames = html.scan(%r{/stories/([A-Za-z0-9._]{1,30})/}).flatten.map { |u| normalize_username(u) }.reject(&:blank?).uniq
-
meta[:story_regex_usernames] = story_usernames.length
-
-
story_usernames.each do |username|
-
users[username] ||= { display_name: username }
-
end
-
else
-
meta[:story_anchor_hrefs] = 0
-
meta[:story_regex_usernames] = 0
-
end
-
-
meta[:unique_story_usernames] = users.length
-
-
users
-
end
-
end
-
-
def fetch_conversation_users_via_api(limit: 120)
-
users = {}
-
cursor = nil
-
remaining = limit.to_i.clamp(1, 400)
-
safety = 0
-
-
loop do
-
safety += 1
-
break if safety > 12
-
break if remaining <= 0
-
-
count = [ remaining, 50 ].min
-
q = [ "limit=#{count}", "visual_message_return_type=unseen" ]
-
q << "cursor=#{CGI.escape(cursor)}" if cursor.present?
-
path = "/api/v1/direct_v2/inbox/?#{q.join('&')}"
-
body = ig_api_get_json(path: path, referer: "#{INSTAGRAM_BASE_URL}/direct/inbox/")
-
break unless body.is_a?(Hash)
-
-
inbox = body["inbox"].is_a?(Hash) ? body["inbox"] : {}
-
threads = Array(inbox["threads"])
-
break if threads.empty?
-
-
threads.each do |thread|
-
next unless thread.is_a?(Hash)
-
Array(thread["thread_users"]).each do |u|
-
next unless u.is_a?(Hash)
-
username = normalize_username(u["username"])
-
next if username.blank?
-
-
users[username] ||= {
-
display_name: u["full_name"].to_s.strip.presence || username,
-
profile_pic_url: CGI.unescapeHTML(u["profile_pic_url"].to_s).strip.presence
-
}
-
end
-
end
-
-
remaining -= threads.length
-
cursor = inbox["oldest_cursor"].to_s.strip.presence
-
break if cursor.blank?
-
end
-
-
users
-
rescue StandardError
-
{}
-
end
-
-
def fetch_story_users_via_api
-
body = ig_api_get_json(path: "/api/v1/feed/reels_tray/", referer: INSTAGRAM_BASE_URL)
-
return {} unless body.is_a?(Hash)
-
-
tray_items =
-
if body["tray"].is_a?(Array)
-
body["tray"]
-
elsif body["tray"].is_a?(Hash)
-
Array(body.dig("tray", "items"))
-
else
-
[]
-
end
-
-
users = {}
-
tray_items.each do |item|
-
next unless item.is_a?(Hash)
-
user = item["user"].is_a?(Hash) ? item["user"] : item
-
username = normalize_username(user["username"])
-
next if username.blank?
-
-
users[username] ||= {
-
display_name: user["full_name"].to_s.strip.presence || username,
-
profile_pic_url: CGI.unescapeHTML(user["profile_pic_url"].to_s).strip.presence
-
}
-
end
-
-
users
-
rescue StandardError
-
{}
-
end
-
-
def source_for(username, conversation_users, story_users)
-
in_conversation = conversation_users.key?(username)
-
in_story = story_users.key?(username)
-
-
return "conversation+story" if in_conversation && in_story
-
return "story" if in_story
-
-
"conversation"
-
end
-
end
-
end
-
end
-
module Instagram
-
class Client
-
class SyncDataService
-
def initialize(
-
account:,
-
with_recoverable_session:,
-
with_authenticated_driver:,
-
collect_conversation_users:,
-
collect_story_users:,
-
fetch_eligibility:,
-
source_for:
-
)
-
@account = account
-
@with_recoverable_session = with_recoverable_session
-
@with_authenticated_driver = with_authenticated_driver
-
@collect_conversation_users = collect_conversation_users
-
@collect_story_users = collect_story_users
-
@fetch_eligibility = fetch_eligibility
-
@source_for = source_for
-
end
-
-
def call
-
with_recoverable_session.call(label: "sync") do
-
with_authenticated_driver.call do |driver|
-
conversation_users = collect_conversation_users.call(driver)
-
story_users = collect_story_users.call(driver)
-
-
usernames = (conversation_users.keys + story_users.keys).uniq
-
-
usernames.each do |username|
-
eligibility =
-
if conversation_users.key?(username)
-
{ can_message: true, restriction_reason: nil }
-
else
-
fetch_eligibility.call(driver, username)
-
end
-
-
recipient = account.recipients.find_or_initialize_by(username: username)
-
recipient.display_name = conversation_users.dig(username, :display_name) || story_users.dig(username, :display_name) || username
-
recipient.source = source_for.call(username, conversation_users, story_users)
-
recipient.story_visible = story_users.key?(username)
-
recipient.can_message = eligibility[:can_message]
-
recipient.restriction_reason = eligibility[:restriction_reason]
-
recipient.save!
-
-
peer = account.conversation_peers.find_or_initialize_by(username: username)
-
peer.display_name = recipient.display_name
-
peer.last_message_at = Time.current
-
peer.save!
-
end
-
-
account.update!(last_synced_at: Time.current)
-
-
{
-
recipients: account.recipients.count,
-
eligible: account.recipients.eligible.count
-
}
-
end
-
end
-
end
-
-
private
-
-
attr_reader :account,
-
:with_recoverable_session,
-
:with_authenticated_driver,
-
:collect_conversation_users,
-
:collect_story_users,
-
:fetch_eligibility,
-
:source_for
-
end
-
end
-
end
-
module Instagram
-
class Client
-
class SyncFollowGraphService
-
def initialize(
-
account:,
-
with_recoverable_session:,
-
with_authenticated_driver:,
-
collect_conversation_users:,
-
collect_story_users:,
-
collect_follow_list:,
-
upsert_follow_list:
-
)
-
@account = account
-
@with_recoverable_session = with_recoverable_session
-
@with_authenticated_driver = with_authenticated_driver
-
@collect_conversation_users = collect_conversation_users
-
@collect_story_users = collect_story_users
-
@collect_follow_list = collect_follow_list
-
@upsert_follow_list = upsert_follow_list
-
end
-
-
def call
-
with_recoverable_session.call(label: "sync_follow_graph") do
-
with_authenticated_driver.call do |driver|
-
raise "Instagram username must be set on the account before syncing" if account.username.blank?
-
-
conversation_users = collect_conversation_users.call(driver)
-
story_users = collect_story_users.call(driver)
-
-
followers = collect_follow_list.call(driver, list_kind: :followers, profile_username: account.username)
-
following = collect_follow_list.call(driver, list_kind: :following, profile_username: account.username)
-
-
follower_usernames = followers.keys
-
following_usernames = following.keys
-
mutuals = follower_usernames & following_usernames
-
-
InstagramProfile.transaction do
-
account.instagram_profiles.update_all(following: false, follows_you: false)
-
-
upsert_follow_list.call(followers, following_flag: false, follows_you_flag: true)
-
upsert_follow_list.call(following, following_flag: true, follows_you_flag: false)
-
-
account.instagram_profiles.where(username: mutuals).update_all(last_synced_at: Time.current)
-
-
messageable_usernames = conversation_users.keys
-
account.instagram_profiles.where(username: messageable_usernames).update_all(
-
can_message: true,
-
restriction_reason: nil,
-
dm_interaction_state: "messageable",
-
dm_interaction_reason: "inbox_thread_seen",
-
dm_interaction_checked_at: Time.current,
-
dm_interaction_retry_after_at: nil
-
)
-
end
-
-
mark_story_visibility!(story_users: story_users)
-
account.update!(last_synced_at: Time.current)
-
-
{
-
followers: follower_usernames.length,
-
following: following_usernames.length,
-
mutuals: mutuals.length,
-
conversation_threads: conversation_users.length,
-
profiles_total: account.instagram_profiles.count,
-
story_tray_visible: story_users.length
-
}
-
end
-
end
-
end
-
-
private
-
-
attr_reader :account,
-
:with_recoverable_session,
-
:with_authenticated_driver,
-
:collect_conversation_users,
-
:collect_story_users,
-
:collect_follow_list,
-
:upsert_follow_list
-
-
def mark_story_visibility!(story_users:)
-
now = Time.current
-
-
story_users.each_key do |username|
-
profile = account.instagram_profiles.find_by(username: username)
-
next unless profile
-
-
profile.last_story_seen_at = now
-
profile.recompute_last_active!
-
profile.save!
-
-
profile.record_event!(
-
kind: "story_seen",
-
external_id: "story_seen:#{now.utc.to_date.iso8601}",
-
occurred_at: nil,
-
metadata: { source: "home_story_tray" }
-
)
-
end
-
end
-
end
-
end
-
end
-
module Instagram
-
class Client
-
module TaskCaptureSupport
-
private
-
-
def with_task_capture(driver:, task_name:, meta: {})
-
result = yield
-
capture_task_html(driver: driver, task_name: task_name, status: "ok", meta: meta)
-
result
-
rescue StandardError => e
-
capture_task_html(
-
driver: driver,
-
task_name: task_name,
-
status: "error",
-
meta: meta.merge(
-
error_class: e.class.name,
-
error_message: e.message,
-
error_backtrace: Array(e.backtrace).take(40)
-
)
-
)
-
raise
-
end
-
-
def capture_task_html(driver:, task_name:, status:, meta: {})
-
timestamp = Time.current.utc.strftime("%Y%m%dT%H%M%S.%LZ")
-
slug = task_name.to_s.downcase.gsub(/[^a-z0-9]+/, "_").gsub(/\A_|_\z/, "")
-
root = DEBUG_CAPTURE_DIR.join(Time.current.utc.strftime("%Y%m%d"))
-
FileUtils.mkdir_p(root)
-
-
base = "#{timestamp}_#{slug}_#{status}"
-
html_path = root.join("#{base}.html")
-
json_path = root.join("#{base}.json")
-
png_path = root.join("#{base}.png")
-
-
html = begin
-
driver.page_source.to_s
-
rescue StandardError => e
-
"<!-- unable to capture page_source: #{e.class}: #{e.message} -->"
-
end
-
-
metadata = {
-
timestamp: Time.current.utc.iso8601(3),
-
task_name: task_name,
-
status: status,
-
account_username: @account.username,
-
current_url: safe_driver_value(driver) { driver.current_url },
-
page_title: safe_driver_value(driver) { driver.title }
-
}.merge(meta)
-
-
# Best-effort capture of browser console logs. Not all driver builds support this.
-
logs =
-
safe_driver_value(driver) do
-
next nil unless driver.respond_to?(:logs)
-
types = driver.logs.available_types
-
next nil unless types.include?(:browser) || types.include?("browser")
-
-
driver.logs.get(:browser).map do |entry|
-
{
-
timestamp: entry.timestamp,
-
level: entry.level,
-
message: entry.message.to_s.byteslice(0, 2000)
-
}
-
end.last(200)
-
end
-
metadata[:browser_console] = logs if logs.present?
-
-
perf =
-
safe_driver_value(driver) do
-
next nil unless driver.respond_to?(:logs)
-
types = driver.logs.available_types
-
next nil unless types.include?(:performance) || types.include?("performance")
-
-
driver.logs.get(:performance).map do |entry|
-
{ timestamp: entry.timestamp, message: entry.message.to_s.byteslice(0, 20_000) }
-
end.last(300)
-
end
-
if perf.present?
-
metadata[:performance_summary] = summarize_performance_logs(perf)
-
metadata[:performance_logs] = filter_performance_logs(perf)
-
end
-
-
# Screenshot helps catch transient toasts/overlays that aren't obvious from HTML.
-
safe_driver_value(driver) do
-
driver.save_screenshot(png_path.to_s)
-
true
-
end
-
metadata[:screenshot] = png_path.to_s if File.exist?(png_path)
-
-
File.write(html_path, html)
-
File.write(json_path, JSON.pretty_generate(metadata))
-
rescue StandardError => e
-
Rails.logger.warn("Failed to write debug capture for #{task_name}: #{e.class}: #{e.message}")
-
end
-
-
def summarize_performance_logs(entries)
-
# Chrome "performance" log entries are JSON strings.
-
# We keep a small summary so the JSON artifacts stay readable.
-
requests = []
-
responses = {}
-
-
Array(entries).each do |e|
-
raw = e.is_a?(Hash) ? e[:message] || e["message"] : nil
-
next if raw.blank?
-
-
msg = JSON.parse(raw) rescue nil
-
inner = msg.is_a?(Hash) ? msg["message"] : nil
-
next unless inner.is_a?(Hash)
-
-
method = inner["method"].to_s
-
params = inner["params"].is_a?(Hash) ? inner["params"] : {}
-
-
case method
-
when "Network.requestWillBeSent"
-
req = params["request"].is_a?(Hash) ? params["request"] : {}
-
url = req["url"].to_s
-
next if url.blank?
-
next unless interesting_perf_url?(url)
-
requests << { request_id: params["requestId"], url: url, http_method: req["method"] }
-
when "Network.responseReceived"
-
resp = params["response"].is_a?(Hash) ? params["response"] : {}
-
url = resp["url"].to_s
-
next if url.blank?
-
next unless interesting_perf_url?(url)
-
responses[params["requestId"].to_s] = { url: url, status: resp["status"], mime_type: resp["mimeType"] }
-
end
-
end
-
-
recent = requests.last(40).map do |r|
-
rid = r[:request_id].to_s
-
r.merge(response: responses[rid])
-
end
-
-
{
-
interesting_request_count: requests.size,
-
recent_interesting: recent
-
}
-
rescue StandardError => e
-
{ error: "#{e.class}: #{e.message}" }
-
end
-
-
def filter_performance_logs(entries)
-
# Keep only likely-relevant messages to avoid huge JSON artifacts.
-
Array(entries).select do |e|
-
raw = e.is_a?(Hash) ? e[:message] || e["message"] : nil
-
next false if raw.blank?
-
raw.include?("Network.requestWillBeSent") ||
-
raw.include?("Network.responseReceived") ||
-
raw.include?("Network.loadingFailed")
-
end.last(200)
-
end
-
-
def interesting_perf_url?(url)
-
u = url.to_s
-
u.include?("/api/v1/") ||
-
u.include?("/graphql") ||
-
u.include?("/direct") ||
-
u.include?("direct_v2") ||
-
u.include?("broadcast")
-
end
-
-
def safe_driver_value(driver)
-
yield
-
rescue StandardError
-
nil
-
end
-
end
-
end
-
end
-
require "net/http"
-
require "digest"
-
require "set"
-
-
module Instagram
-
class ProfileAnalysisCollector
-
MAX_POST_IMAGE_BYTES = 6 * 1024 * 1024
-
MAX_POST_VIDEO_BYTES = 80 * 1024 * 1024
-
-
def initialize(account:, profile:)
-
@account = account
-
@profile = profile
-
@client = Instagram::Client.new(account: account)
-
end
-
-
def collect_and_persist!(
-
posts_limit: nil,
-
comments_limit: 8,
-
track_missing_as_deleted: false,
-
sync_source: "instagram_profile_analysis_dataset",
-
download_media: true
-
)
-
dataset = @client.fetch_profile_analysis_dataset!(
-
username: @profile.username,
-
posts_limit: posts_limit,
-
comments_limit: comments_limit
-
)
-
-
synced_at = Time.current
-
details = dataset[:profile] || {}
-
update_profile_from_details!(details)
-
-
fetched_shortcodes = Set.new
-
summary = {
-
created_count: 0,
-
updated_count: 0,
-
unchanged_count: 0,
-
restored_count: 0,
-
deleted_count: 0,
-
created_shortcodes: [],
-
updated_shortcodes: [],
-
restored_shortcodes: [],
-
deleted_shortcodes: [],
-
analysis_candidate_shortcodes: [],
-
feed_fetch: dataset[:feed_fetch].is_a?(Hash) ? dataset[:feed_fetch] : {}
-
}
-
-
persisted_posts = Array(dataset[:posts]).map do |post_data|
-
result = persist_profile_post!(
-
post_data,
-
synced_at: synced_at,
-
sync_source: sync_source,
-
download_media: ActiveModel::Type::Boolean.new.cast(download_media)
-
)
-
next nil unless result
-
-
post = result[:post]
-
fetched_shortcodes << post.shortcode.to_s
-
-
case result[:change]
-
when :created
-
summary[:created_count] += 1
-
summary[:created_shortcodes] << post.shortcode.to_s
-
when :restored
-
summary[:restored_count] += 1
-
summary[:restored_shortcodes] << post.shortcode.to_s
-
when :updated
-
summary[:updated_count] += 1
-
summary[:updated_shortcodes] << post.shortcode.to_s
-
else
-
summary[:unchanged_count] += 1
-
end
-
if result[:analysis_required]
-
summary[:analysis_candidate_shortcodes] << post.shortcode.to_s
-
end
-
-
post
-
end.compact
-
-
if ActiveModel::Type::Boolean.new.cast(track_missing_as_deleted) && fetched_shortcodes.any?
-
deleted = mark_missing_posts_as_deleted!(
-
fetched_shortcodes: fetched_shortcodes,
-
synced_at: synced_at,
-
sync_source: sync_source
-
)
-
summary[:deleted_count] = deleted[:count]
-
summary[:deleted_shortcodes] = deleted[:shortcodes]
-
end
-
-
{
-
details: details,
-
posts: persisted_posts,
-
summary: summary.merge(
-
created_shortcodes: Array(summary[:created_shortcodes]).uniq,
-
updated_shortcodes: Array(summary[:updated_shortcodes]).uniq,
-
restored_shortcodes: Array(summary[:restored_shortcodes]).uniq,
-
deleted_shortcodes: Array(summary[:deleted_shortcodes]).uniq,
-
analysis_candidate_shortcodes: Array(summary[:analysis_candidate_shortcodes]).uniq
-
)
-
}
-
end
-
-
private
-
-
def update_profile_from_details!(details)
-
attrs = {
-
display_name: details[:display_name].presence || @profile.display_name,
-
profile_pic_url: details[:profile_pic_url].presence || @profile.profile_pic_url,
-
ig_user_id: details[:ig_user_id].presence || @profile.ig_user_id,
-
bio: details[:bio].presence || @profile.bio,
-
followers_count: normalize_count(details[:followers_count]) || @profile.followers_count,
-
last_post_at: details[:last_post_at].presence || @profile.last_post_at
-
}
-
-
@profile.update!(attrs)
-
@profile.recompute_last_active!
-
@profile.save!
-
end
-
-
def normalize_count(value)
-
text = value.to_s.strip
-
return nil unless text.match?(/\A\d+\z/)
-
-
text.to_i
-
rescue StandardError
-
nil
-
end
-
-
def persist_profile_post!(post_data, synced_at:, sync_source:, download_media:)
-
shortcode = post_data[:shortcode].to_s.strip
-
return nil if shortcode.blank?
-
-
post = @profile.instagram_profile_posts.find_or_initialize_by(shortcode: shortcode)
-
previous_signature = post_signature(post)
-
previous_analysis_signature = post_analysis_signature(post)
-
was_new = post.new_record?
-
was_deleted = post_deleted?(post)
-
existing_metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
merged_metadata = existing_metadata.merge(
-
"media_type" => post_data[:media_type],
-
"media_id" => post_data[:media_id],
-
"post_kind" => post_data[:post_kind],
-
"product_type" => post_data[:product_type],
-
"media_url" => post_data[:media_url].to_s.presence,
-
"media_url_image" => post_data[:image_url].to_s.presence,
-
"media_url_video" => post_data[:video_url].to_s.presence,
-
"image_url" => post_data[:image_url].to_s.presence,
-
"video_url" => post_data[:video_url].to_s.presence,
-
"is_repost" => ActiveModel::Type::Boolean.new.cast(post_data[:is_repost]),
-
"comments_count_api" => post_data[:comments_count],
-
"source" => sync_source.to_s
-
)
-
merged_metadata.delete("deleted_from_source")
-
merged_metadata.delete("deleted_detected_at")
-
merged_metadata.delete("deleted_reason")
-
merged_metadata["restored_at"] = synced_at.utc.iso8601(3) if was_deleted
-
-
post.instagram_account = @account
-
post.taken_at = post_data[:taken_at]
-
post.caption = post_data[:caption]
-
post.permalink = post_data[:permalink]
-
post.source_media_url = post_data[:media_url].presence || post_data[:image_url]
-
post.likes_count = post_data[:likes_count].to_i
-
extracted_comments_count = Array(post_data[:comments]).size
-
api_comments_count = post_data[:comments_count].to_i
-
post.comments_count = [ extracted_comments_count, api_comments_count ].max
-
post.last_synced_at = synced_at
-
post.metadata = merged_metadata
-
post.save!
-
-
if download_media
-
sync_media!(
-
post: post,
-
media_url: post_data[:media_url].presence || post_data[:image_url],
-
media_id: post_data[:media_id]
-
)
-
end
-
sync_comments!(
-
post: post,
-
comments: post_data[:comments],
-
expected_comments_count: post_data[:comments_count]
-
)
-
-
current_signature = post_signature(post.reload)
-
current_analysis_signature = post_analysis_signature(post)
-
changed = (previous_signature != current_signature)
-
change =
-
if was_new
-
:created
-
elsif was_deleted
-
:restored
-
elsif changed
-
:updated
-
else
-
:unchanged
-
end
-
-
analysis_required =
-
was_new ||
-
was_deleted ||
-
(previous_analysis_signature != current_analysis_signature) ||
-
post.ai_status.to_s != "analyzed" ||
-
post.analyzed_at.blank?
-
if analysis_required && (post.ai_status.to_s != "pending" || post.analyzed_at.present?)
-
post.update_columns(ai_status: "pending", analyzed_at: nil, updated_at: Time.current)
-
end
-
-
{ post: post, change: change, analysis_required: analysis_required }
-
end
-
-
def sync_media!(post:, media_url:, media_id: nil)
-
url = media_url.to_s.strip
-
return false if url.blank?
-
-
incoming_media_id = media_id.to_s.strip
-
existing_metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
existing_media_id = existing_metadata["media_id"].to_s.strip
-
if post.media.attached? && incoming_media_id.present? && existing_media_id.present? && incoming_media_id == existing_media_id
-
return false
-
end
-
-
fp = Digest::SHA256.hexdigest(url)
-
return false if post.media.attached? && post.media_url_fingerprint.to_s == fp
-
-
if attach_media_from_local_cache!(post: post, incoming_media_id: incoming_media_id, fingerprint: fp)
-
return true
-
end
-
-
io, content_type, filename = download_media(url)
-
blob = ActiveStorage::Blob.create_and_upload!(
-
io: io,
-
filename: filename,
-
content_type: content_type,
-
identify: false
-
)
-
attach_blob_to_post!(post: post, blob: blob)
-
post.update!(media_url_fingerprint: fp)
-
true
-
rescue StandardError => e
-
Rails.logger.warn("[ProfileAnalysisCollector] media sync failed for shortcode=#{post.shortcode}: #{e.class}: #{e.message}")
-
false
-
ensure
-
io&.close if defined?(io) && io.respond_to?(:close)
-
end
-
-
def attach_media_from_local_cache!(post:, incoming_media_id:, fingerprint:)
-
blob = cached_profile_post_blob(post: post, incoming_media_id: incoming_media_id)
-
return false unless blob
-
return false unless blob_integrity_for(blob)[:valid]
-
-
if post.media.attached? && post.media.blob_id == blob.id
-
if post.media_url_fingerprint.to_s != fingerprint
-
post.update!(media_url_fingerprint: fingerprint)
-
return true
-
end
-
return false
-
end
-
-
attach_blob_to_post!(post: post, blob: blob)
-
post.update!(media_url_fingerprint: fingerprint)
-
true
-
rescue StandardError => e
-
Rails.logger.warn("[ProfileAnalysisCollector] local media cache attach failed for shortcode=#{post.shortcode}: #{e.class}: #{e.message}")
-
false
-
end
-
-
def cached_profile_post_blob(post:, incoming_media_id:)
-
by_media_id = cached_blob_from_profile_posts(post: post, incoming_media_id: incoming_media_id)
-
return by_media_id if by_media_id
-
-
by_shortcode = cached_blob_from_profile_posts(post: post, incoming_media_id: nil)
-
return by_shortcode if by_shortcode
-
-
cached_blob_from_feed_posts(shortcode: post.shortcode)
-
end
-
-
def cached_blob_from_profile_posts(post:, incoming_media_id:)
-
scope = InstagramProfilePost.joins(:media_attachment).where.not(id: post.id)
-
if incoming_media_id.to_s.present?
-
scope = scope.where("metadata ->> 'media_id' = ?", incoming_media_id.to_s)
-
else
-
shortcode = post.shortcode.to_s.strip
-
return nil if shortcode.blank?
-
-
scope = scope.where(shortcode: shortcode)
-
end
-
-
scope.order(updated_at: :desc, id: :desc).each do |candidate|
-
next unless candidate&.media&.attached?
-
-
blob = candidate.media.blob
-
return blob if blob_integrity_for(blob)[:valid]
-
end
-
-
nil
-
end
-
-
def cached_blob_from_feed_posts(shortcode:)
-
value = shortcode.to_s.strip
-
return nil if value.blank?
-
-
InstagramPost
-
.joins(:media_attachment)
-
.where(shortcode: value)
-
.order(media_downloaded_at: :desc, id: :desc)
-
.each do |candidate|
-
next unless candidate&.media&.attached?
-
-
blob = candidate.media.blob
-
return blob if blob_integrity_for(blob)[:valid]
-
end
-
-
nil
-
end
-
-
def sync_comments!(post:, comments:, expected_comments_count:)
-
entries = Array(comments).first(20)
-
normalized_entries = entries.filter_map do |c|
-
body = c[:text].to_s.strip
-
next if body.blank?
-
[c[:author_username].to_s.strip.presence, body, c[:created_at]&.to_i]
-
end
-
existing_entries = post.instagram_profile_post_comments.order(:id).map do |comment|
-
[comment.author_username.to_s.strip.presence, comment.body.to_s.strip, comment.commented_at&.to_i]
-
end
-
-
return if normalized_entries == existing_entries && normalized_entries.any?
-
-
if entries.empty?
-
# Keep previously captured comments when this sync could not fetch them.
-
# Only clear if the source explicitly reports no comments.
-
if expected_comments_count.to_i <= 0
-
post.instagram_profile_post_comments.delete_all
-
end
-
return
-
end
-
-
post.instagram_profile_post_comments.delete_all
-
-
entries.each do |c|
-
body = c[:text].to_s.strip
-
next if body.blank?
-
-
post.instagram_profile_post_comments.create!(
-
instagram_profile: @profile,
-
author_username: c[:author_username].to_s.strip.presence,
-
body: body,
-
commented_at: c[:created_at],
-
metadata: { source: "instagram_feed_preview" }
-
)
-
end
-
end
-
-
def download_media(url, redirects_left: 4)
-
uri = URI.parse(url)
-
raise "invalid media URL" unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
-
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.use_ssl = (uri.scheme == "https")
-
http.open_timeout = 10
-
http.read_timeout = 30
-
-
req = Net::HTTP::Get.new(uri.request_uri)
-
req["Accept"] = "*/*"
-
req["User-Agent"] = @account.user_agent.presence || "Mozilla/5.0"
-
req["Referer"] = Instagram::Client::INSTAGRAM_BASE_URL
-
res = http.request(req)
-
-
if res.is_a?(Net::HTTPRedirection) && res["location"].present?
-
raise "too many redirects" if redirects_left.to_i <= 0
-
-
redirected_url = normalize_redirect_url(base_uri: uri, location: res["location"])
-
raise "invalid redirect URL" if redirected_url.blank?
-
-
return download_media(redirected_url, redirects_left: redirects_left.to_i - 1)
-
end
-
-
raise "media download failed: HTTP #{res.code}" unless res.is_a?(Net::HTTPSuccess)
-
-
body = res.body.to_s
-
content_type = res["content-type"].to_s.split(";").first.presence || "application/octet-stream"
-
size_limit = content_type.start_with?("video/") ? MAX_POST_VIDEO_BYTES : MAX_POST_IMAGE_BYTES
-
raise "empty media payload" if body.bytesize <= 0
-
raise "media too large" if body.bytesize > size_limit
-
raise "unexpected html payload" if html_payload?(body)
-
validate_known_signature!(body: body, content_type: content_type)
-
-
ext = extension_for_content_type(content_type: content_type)
-
io = StringIO.new(body)
-
io.set_encoding(Encoding::BINARY) if io.respond_to?(:set_encoding)
-
[io, content_type, "profile_post_#{Digest::SHA256.hexdigest(url)[0, 12]}.#{ext}"]
-
end
-
-
def normalize_redirect_url(base_uri:, location:)
-
target = URI.join(base_uri.to_s, location.to_s).to_s
-
uri = URI.parse(target)
-
return nil unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
-
-
uri.to_s
-
rescue URI::InvalidURIError, ArgumentError
-
nil
-
end
-
-
def extension_for_content_type(content_type:)
-
return "jpg" if content_type.include?("jpeg")
-
return "png" if content_type.include?("png")
-
return "webp" if content_type.include?("webp")
-
return "gif" if content_type.include?("gif")
-
return "mp4" if content_type.include?("mp4")
-
return "mov" if content_type.include?("quicktime")
-
-
"bin"
-
end
-
-
def blob_integrity_for(blob)
-
return { valid: false, reason: "missing_blob" } unless blob
-
return { valid: false, reason: "non_positive_byte_size" } if blob.byte_size.to_i <= 0
-
-
service = blob.service
-
if service.respond_to?(:path_for, true)
-
path = service.send(:path_for, blob.key)
-
return { valid: false, reason: "missing_file_on_disk" } unless path && File.exist?(path)
-
-
file_size = File.size(path)
-
return { valid: false, reason: "zero_byte_file" } if file_size <= 0
-
return { valid: false, reason: "byte_size_mismatch" } if blob.byte_size.to_i.positive? && file_size != blob.byte_size.to_i
-
end
-
-
{ valid: true, reason: nil }
-
rescue StandardError => e
-
{ valid: false, reason: "integrity_check_error: #{e.class}" }
-
end
-
-
def html_payload?(body)
-
sample = body.to_s.byteslice(0, 4096).to_s.downcase
-
sample.include?("<html") || sample.start_with?("<!doctype html")
-
end
-
-
def validate_known_signature!(body:, content_type:)
-
type = content_type.to_s.downcase
-
return if type.blank?
-
return if type.include?("octet-stream")
-
-
case
-
when type.include?("jpeg")
-
raise "invalid jpeg signature" unless body.start_with?("\xFF\xD8".b)
-
when type.include?("png")
-
raise "invalid png signature" unless body.start_with?("\x89PNG\r\n\x1A\n".b)
-
when type.include?("gif")
-
raise "invalid gif signature" unless body.start_with?("GIF87a".b) || body.start_with?("GIF89a".b)
-
when type.include?("webp")
-
raise "invalid webp signature" unless body.bytesize >= 12 && body.byteslice(0, 4) == "RIFF" && body.byteslice(8, 4) == "WEBP"
-
when type.start_with?("video/")
-
raise "invalid video signature" unless body.bytesize >= 12 && body.byteslice(4, 4) == "ftyp"
-
end
-
end
-
-
def attach_blob_to_post!(post:, blob:)
-
raise "missing blob for attach" unless blob
-
-
if post.media.attached? && post.media.attachment.present?
-
attachment = post.media.attachment
-
attachment.update!(blob: blob) if attachment.blob_id != blob.id
-
return
-
end
-
-
post.media.attach(blob)
-
end
-
-
def mark_missing_posts_as_deleted!(fetched_shortcodes:, synced_at:, sync_source:)
-
missing = @profile.instagram_profile_posts.where.not(shortcode: fetched_shortcodes.to_a)
-
shortcodes = []
-
-
missing.find_each do |post|
-
next if post_deleted?(post)
-
-
metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
metadata["deleted_from_source"] = true
-
metadata["deleted_detected_at"] = synced_at.utc.iso8601(3)
-
metadata["deleted_reason"] = "missing_from_latest_capture"
-
metadata["source"] = sync_source.to_s
-
post.update!(metadata: metadata, last_synced_at: synced_at)
-
shortcodes << post.shortcode.to_s
-
end
-
-
{ count: shortcodes.length, shortcodes: shortcodes }
-
end
-
-
def post_deleted?(post)
-
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
ActiveModel::Type::Boolean.new.cast(metadata["deleted_from_source"])
-
end
-
-
def post_signature(post)
-
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
{
-
shortcode: post.shortcode.to_s,
-
taken_at: post.taken_at&.utc&.iso8601(3),
-
caption: post.caption.to_s,
-
permalink: post.permalink.to_s,
-
source_media_url: post.source_media_url.to_s,
-
likes_count: post.likes_count.to_i,
-
comments_count: post.comments_count.to_i,
-
media_url_fingerprint: post.media_url_fingerprint.to_s,
-
media_id: metadata["media_id"].to_s,
-
media_type: metadata["media_type"].to_s,
-
deleted_from_source: ActiveModel::Type::Boolean.new.cast(metadata["deleted_from_source"])
-
}
-
end
-
-
def post_analysis_signature(post)
-
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
{
-
shortcode: post.shortcode.to_s,
-
taken_at: post.taken_at&.utc&.iso8601(3),
-
caption: post.caption.to_s,
-
source_media_url: post.source_media_url.to_s,
-
media_url_fingerprint: post.media_url_fingerprint.to_s,
-
media_id: metadata["media_id"].to_s,
-
media_type: metadata["media_type"].to_s
-
}
-
end
-
end
-
end
-
module Instagram
-
class ProfileScanPolicy
-
DEFAULT_MAX_FOLLOWERS = 20_000
-
EXCLUDED_SCAN_TAG = "profile_scan_excluded".freeze
-
PERSONAL_OVERRIDE_TAGS = %w[personal_user friend female_friend male_friend relative].freeze
-
-
NON_PERSONAL_PAGE_KEYWORDS = %w[
-
meme memes
-
quote quotes
-
facts fact
-
news updates
-
media entertainment
-
viral humor funny
-
giveaway deals
-
shop store brand
-
fanpage
-
].freeze
-
-
NON_PERSONAL_PAGE_USERNAME_HINTS = %w[
-
official
-
store shop brand media news magazine
-
fanpage memes meme quotes facts
-
deals promo sale
-
clips reposts updates daily
-
business company agency studio
-
].freeze
-
-
NON_PERSONAL_BIO_CTA_HINTS = [
-
"link in bio",
-
"dm for collab",
-
"dm for promo",
-
"for business inquiries",
-
"order now",
-
"shop now",
-
"customer care",
-
"whatsapp",
-
"telegram",
-
"booking"
-
].freeze
-
-
NON_PERSONAL_CATEGORY_PATTERN = /\b(media|news|entertainment|publisher|brand|store|shop|business|company|organization|community|product\/service)\b/.freeze
-
-
def self.max_followers_threshold
-
configured = Rails.application.config.x.instagram.profile_scan_max_followers
-
value = parse_integer(configured)
-
return value if value.to_i.positive?
-
-
DEFAULT_MAX_FOLLOWERS
-
end
-
-
def self.skip_from_cached_profile?(profile:)
-
decision = new(profile: profile).decision
-
ActiveModel::Type::Boolean.new.cast(decision[:skip_scan])
-
rescue StandardError
-
false
-
end
-
-
def self.build_skip_post_analysis_payload(decision:)
-
data = decision.is_a?(Hash) ? decision : {}
-
{
-
"skipped" => true,
-
"policy" => "profile_scan_policy_v1",
-
"reason_code" => data[:reason_code].to_s.presence || data["reason_code"].to_s.presence || "scan_policy_blocked",
-
"reason" => data[:reason].to_s.presence || data["reason"].to_s.presence || "Post analysis skipped by profile scan policy.",
-
"followers_count" => parse_integer(data[:followers_count] || data["followers_count"]),
-
"max_allowed_followers" => parse_integer(data[:max_followers] || data["max_followers"]) || max_followers_threshold,
-
"decided_at" => Time.current.iso8601
-
}.compact
-
end
-
-
def self.mark_post_analysis_skipped!(post:, decision:)
-
payload = build_skip_post_analysis_payload(decision: decision)
-
existing = post.analysis.is_a?(Hash) ? post.analysis.deep_dup : {}
-
post.update!(
-
ai_status: "analyzed",
-
analyzed_at: Time.current,
-
ai_provider: "policy",
-
ai_model: "profile_scan_policy_v1",
-
analysis: existing.merge(payload)
-
)
-
end
-
-
def self.mark_scan_excluded!(profile:)
-
tag = ProfileTag.find_or_create_by!(name: EXCLUDED_SCAN_TAG)
-
return if profile.profile_tags.exists?(id: tag.id)
-
-
profile.profile_tags << tag
-
end
-
-
def self.clear_scan_excluded!(profile:)
-
tag = ProfileTag.find_by(name: EXCLUDED_SCAN_TAG)
-
return unless tag
-
-
profile.profile_tags.destroy(tag) if profile.profile_tags.exists?(id: tag.id)
-
end
-
-
def initialize(profile:, profile_details: nil, max_followers: nil)
-
@profile = profile
-
@profile_details = profile_details.is_a?(Hash) ? profile_details.deep_symbolize_keys : {}
-
@max_followers = self.class.parse_integer(max_followers) || self.class.max_followers_threshold
-
end
-
-
def decision
-
@decision ||= evaluate
-
end
-
-
private
-
-
def evaluate
-
followers_count = resolved_followers_count
-
if followers_count.to_i.positive? && followers_count > @max_followers.to_i
-
return build_decision(
-
skip_scan: true,
-
skip_post_analysis: true,
-
reason_code: "followers_threshold_exceeded",
-
reason: "followers_count #{followers_count} exceeds max allowed #{@max_followers}.",
-
followers_count: followers_count,
-
max_followers: @max_followers
-
)
-
end
-
-
if scan_excluded_tagged?
-
return build_decision(
-
skip_scan: true,
-
skip_post_analysis: true,
-
reason_code: "scan_excluded_tag",
-
reason: "Profile tagged as scan-excluded.",
-
followers_count: followers_count,
-
max_followers: @max_followers
-
)
-
end
-
-
if non_personal_page?
-
return build_decision(
-
skip_scan: true,
-
skip_post_analysis: true,
-
reason_code: "non_personal_profile_page",
-
reason: "Profile appears to be a non-personal page (meme/news/info style).",
-
followers_count: followers_count,
-
max_followers: @max_followers
-
)
-
end
-
-
build_decision(
-
skip_scan: false,
-
skip_post_analysis: false,
-
reason_code: "scan_allowed",
-
reason: "Profile eligible for scan and post analysis.",
-
followers_count: followers_count,
-
max_followers: @max_followers
-
)
-
end
-
-
def build_decision(skip_scan:, skip_post_analysis:, reason_code:, reason:, followers_count:, max_followers:)
-
{
-
skip_scan: ActiveModel::Type::Boolean.new.cast(skip_scan),
-
skip_post_analysis: ActiveModel::Type::Boolean.new.cast(skip_post_analysis),
-
reason_code: reason_code.to_s,
-
reason: reason.to_s,
-
followers_count: followers_count,
-
max_followers: max_followers.to_i
-
}
-
end
-
-
def resolved_followers_count
-
from_details = self.class.parse_integer(@profile_details[:followers_count])
-
return from_details if from_details.to_i.positive?
-
-
from_profile = self.class.parse_integer(@profile&.followers_count)
-
return from_profile if from_profile.to_i.positive?
-
-
0
-
end
-
-
def scan_excluded_tagged?
-
profile_tag_names.include?(EXCLUDED_SCAN_TAG)
-
end
-
-
def non_personal_page?
-
return false if personal_override_tagged?
-
-
combined = [
-
@profile&.username,
-
@profile&.display_name,
-
@profile&.bio,
-
@profile_details[:username],
-
@profile_details[:display_name],
-
@profile_details[:bio],
-
@profile_details[:category_name]
-
].map(&:to_s).join(" ").downcase
-
return false if combined.blank?
-
-
keyword_hits = NON_PERSONAL_PAGE_KEYWORDS.count { |keyword| combined.include?(keyword) }
-
username_blob = [ @profile&.username, @profile_details[:username], @profile&.display_name, @profile_details[:display_name] ].map(&:to_s).join(" ").downcase
-
username_hits = NON_PERSONAL_PAGE_USERNAME_HINTS.count { |keyword| username_blob.include?(keyword) }
-
bio_blob = [ @profile&.bio, @profile_details[:bio] ].map(&:to_s).join(" ").downcase
-
cta_hits = NON_PERSONAL_BIO_CTA_HINTS.count { |keyword| bio_blob.include?(keyword) }
-
category = @profile_details[:category_name].to_s.downcase
-
business = ActiveModel::Type::Boolean.new.cast(@profile_details[:is_business_account])
-
professional = ActiveModel::Type::Boolean.new.cast(@profile_details[:is_professional_account])
-
verified = ActiveModel::Type::Boolean.new.cast(@profile_details[:is_verified])
-
business_like = business || professional
-
has_external_link = @profile_details[:external_url].to_s.present?
-
-
return true if business_like && category.match?(NON_PERSONAL_CATEGORY_PATTERN)
-
return true if business_like && (keyword_hits + username_hits + cta_hits >= 2)
-
return true if category.match?(NON_PERSONAL_CATEGORY_PATTERN) && (keyword_hits + username_hits >= 2)
-
return true if keyword_hits >= 3
-
return true if username_hits >= 2 && (cta_hits.positive? || has_external_link)
-
return true if verified && category.match?(NON_PERSONAL_CATEGORY_PATTERN) && keyword_hits.positive?
-
-
false
-
end
-
-
def personal_override_tagged?
-
profile_tag_names.any? { |name| PERSONAL_OVERRIDE_TAGS.include?(name) }
-
end
-
-
def profile_tag_names
-
@profile_tag_names ||= begin
-
return [] unless @profile
-
-
if @profile.association(:profile_tags).loaded?
-
@profile.profile_tags.map { |tag| tag.name.to_s }
-
else
-
@profile.profile_tags.pluck(:name)
-
end
-
end
-
rescue StandardError
-
[]
-
end
-
-
def self.parse_integer(value)
-
return nil if value.nil?
-
-
text = value.to_s.strip
-
return nil if text.blank?
-
return nil unless text.match?(/\A-?\d+\z/)
-
-
text.to_i
-
rescue StandardError
-
nil
-
end
-
end
-
end
-
module InstagramAccounts
-
class DashboardSnapshotService
-
DEFAULT_FAILURE_LIMIT = 25
-
DEFAULT_AUDIT_LIMIT = 120
-
DEFAULT_ACTION_LIMIT = 20
-
DEFAULT_SKIP_WINDOW_HOURS = 72
-
-
def initialize(
-
account:,
-
failure_limit: DEFAULT_FAILURE_LIMIT,
-
audit_limit: DEFAULT_AUDIT_LIMIT,
-
action_limit: DEFAULT_ACTION_LIMIT,
-
skip_window_hours: DEFAULT_SKIP_WINDOW_HOURS
-
)
-
@account = account
-
@failure_limit = failure_limit.to_i.clamp(1, 200)
-
@audit_limit = audit_limit.to_i.clamp(1, 500)
-
@action_limit = action_limit.to_i.clamp(1, 120)
-
@skip_window_hours = skip_window_hours.to_i.clamp(1, 168)
-
end
-
-
def call
-
{
-
issues: Ops::AccountIssues.for(account),
-
metrics: Ops::Metrics.for_account(account),
-
latest_sync_run: account.sync_runs.order(created_at: :desc).first,
-
recent_failures: recent_failures,
-
recent_audit_entries: Ops::AuditLogBuilder.for_account(instagram_account: account, limit: audit_limit),
-
actions_todo_queue: actions_todo_queue_summary,
-
skip_diagnostics: skip_diagnostics
-
}
-
end
-
-
private
-
-
attr_reader :account, :failure_limit, :audit_limit, :action_limit, :skip_window_hours
-
-
def recent_failures
-
BackgroundJobFailure
-
.where(instagram_account_id: account.id)
-
.order(occurred_at: :desc, id: :desc)
-
.limit(failure_limit)
-
end
-
-
def actions_todo_queue_summary
-
Workspace::ActionsTodoQueueService.new(
-
account: account,
-
limit: action_limit,
-
enqueue_processing: true
-
).fetch!
-
rescue StandardError => e
-
{
-
items: [],
-
stats: {
-
total_items: 0,
-
ready_items: 0,
-
processing_items: 0,
-
enqueued_now: 0,
-
refreshed_at: Time.current.iso8601(3),
-
error: e.message.to_s
-
}
-
}
-
end
-
-
def skip_diagnostics
-
SkipDiagnosticsService.new(account: account, hours: skip_window_hours).call
-
end
-
end
-
end
-
module InstagramAccounts
-
class LlmCommentRequestService
-
Result = Struct.new(:payload, :status, keyword_init: true)
-
-
def initialize(account:, event_id:, provider:, model:, status_only:, queue_inspector: LlmQueueInspector.new)
-
@account = account
-
@event_id = event_id
-
@provider = provider.to_s
-
@model = model
-
@status_only = ActiveModel::Type::Boolean.new.cast(status_only)
-
@queue_inspector = queue_inspector
-
end
-
-
def call
-
event = InstagramProfileEvent.find(event_id)
-
return not_found_result unless accessible_event?(event)
-
-
if event.has_llm_generated_comment?
-
event.update_column(:llm_comment_status, "completed") if event.llm_comment_status.to_s != "completed"
-
return completed_result(event)
-
end
-
-
if event.llm_comment_in_progress?
-
if queue_inspector.stale_comment_job?(event: event)
-
event.update_columns(
-
llm_comment_status: "failed",
-
llm_comment_last_error: "Previous generation job appears stalled. Please retry.",
-
updated_at: Time.current
-
)
-
event.reload
-
else
-
return in_progress_result(event)
-
end
-
end
-
-
return status_result(event) if status_only
-
-
job = GenerateLlmCommentJob.perform_later(
-
instagram_profile_event_id: event.id,
-
provider: provider,
-
model: model,
-
requested_by: "dashboard_manual_request"
-
)
-
event.queue_llm_comment_generation!(job_id: job.job_id)
-
-
Result.new(
-
payload: {
-
success: true,
-
status: "queued",
-
event_id: event.id,
-
job_id: job.job_id,
-
estimated_seconds: llm_comment_estimated_seconds(event: event, include_queue: true),
-
queue_size: ai_queue_size
-
},
-
status: :accepted
-
)
-
rescue StandardError => e
-
Result.new(payload: { error: e.message }, status: :unprocessable_entity)
-
end
-
-
private
-
-
attr_reader :account, :event_id, :provider, :model, :status_only, :queue_inspector
-
-
def accessible_event?(event)
-
event.story_archive_item? && event.instagram_profile&.instagram_account_id == account.id
-
end
-
-
def not_found_result
-
Result.new(payload: { error: "Event not found or not accessible" }, status: :not_found)
-
end
-
-
def completed_result(event)
-
Result.new(
-
payload: {
-
success: true,
-
status: "completed",
-
event_id: event.id,
-
llm_generated_comment: event.llm_generated_comment,
-
llm_comment_generated_at: event.llm_comment_generated_at,
-
llm_comment_model: event.llm_comment_model,
-
llm_comment_provider: event.llm_comment_provider,
-
llm_comment_relevance_score: event.llm_comment_relevance_score
-
},
-
status: :ok
-
)
-
end
-
-
def in_progress_result(event)
-
Result.new(
-
payload: {
-
success: true,
-
status: event.llm_comment_status,
-
event_id: event.id,
-
job_id: event.llm_comment_job_id,
-
estimated_seconds: llm_comment_estimated_seconds(event: event),
-
queue_size: ai_queue_size
-
},
-
status: :accepted
-
)
-
end
-
-
def status_result(event)
-
Result.new(
-
payload: {
-
success: true,
-
status: event.llm_comment_status.presence || "not_requested",
-
event_id: event.id,
-
estimated_seconds: llm_comment_estimated_seconds(event: event),
-
queue_size: ai_queue_size
-
},
-
status: :ok
-
)
-
end
-
-
def llm_comment_estimated_seconds(event:, include_queue: false)
-
base = 18
-
queue_factor = include_queue ? (ai_queue_size * 4) : 0
-
attempt_factor = event.llm_comment_attempts.to_i * 6
-
preprocess_factor = story_local_context_preprocess_penalty(event: event)
-
(base + queue_factor + attempt_factor + preprocess_factor).clamp(10, 240)
-
end
-
-
def story_local_context_preprocess_penalty(event:)
-
metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
-
has_context = metadata["local_story_intelligence"].is_a?(Hash) ||
-
metadata["ocr_text"].to_s.present? ||
-
Array(metadata["content_signals"]).any?
-
return 0 if has_context
-
-
media_type = event.media&.blob&.content_type.to_s.presence || metadata["media_content_type"].to_s
-
media_type.start_with?("image/") ? 16 : 8
-
rescue StandardError
-
0
-
end
-
-
def ai_queue_size
-
queue_inspector.queue_size
-
end
-
end
-
end
-
module InstagramAccounts
-
class LlmQueueInspector
-
STALE_AFTER = 5.minutes
-
-
def queue_size
-
return 0 unless sidekiq_adapter?
-
-
require "sidekiq/api"
-
Sidekiq::Queue.new("ai").size.to_i
-
rescue StandardError
-
0
-
end
-
-
def stale_comment_job?(event:)
-
return false unless event.llm_comment_in_progress?
-
return false if event.updated_at && event.updated_at > STALE_AFTER.ago
-
return false unless sidekiq_adapter?
-
-
require "sidekiq/api"
-
job_id = event.llm_comment_job_id.to_s
-
event_marker = "instagram_profile_event_id\"=>#{event.id}"
-
-
return false if currently_busy?(job_id: job_id, event_marker: event_marker)
-
return false if queued?(job_id: job_id, event_marker: event_marker)
-
return false if retrying?(job_id: job_id, event_marker: event_marker)
-
return false if scheduled?(job_id: job_id, event_marker: event_marker)
-
-
true
-
rescue StandardError
-
false
-
end
-
-
private
-
-
def sidekiq_adapter?
-
Rails.application.config.active_job.queue_adapter.to_s == "sidekiq"
-
end
-
-
def currently_busy?(job_id:, event_marker:)
-
Sidekiq::Workers.new.any? do |_pid, _tid, work|
-
payload = work["payload"].to_s
-
payload.include?(job_id) || payload.include?(event_marker)
-
end
-
end
-
-
def queued?(job_id:, event_marker:)
-
Sidekiq::Queue.new("ai").any? do |job|
-
payload = job.item.to_s
-
payload.include?(job_id) || payload.include?(event_marker)
-
end
-
end
-
-
def retrying?(job_id:, event_marker:)
-
Sidekiq::RetrySet.new.any? do |job|
-
payload = job.item.to_s
-
payload.include?(job_id) || payload.include?(event_marker)
-
end
-
end
-
-
def scheduled?(job_id:, event_marker:)
-
Sidekiq::ScheduledSet.new.any? do |job|
-
payload = job.item.to_s
-
payload.include?(job_id) || payload.include?(event_marker)
-
end
-
end
-
end
-
end
-
module InstagramAccounts
-
class SkipDiagnosticsService
-
VALID_REASONS = %w[
-
profile_not_in_network
-
duplicate_story_already_replied
-
invalid_story_media
-
interaction_retry_window_active
-
missing_auto_reply_tag
-
external_profile_link_detected
-
].freeze
-
-
REVIEW_REASONS = %w[
-
reply_box_not_found
-
comment_submit_failed
-
next_navigation_failed
-
story_context_missing
-
reply_precheck_error
-
].freeze
-
-
def initialize(account:, hours:)
-
@account = account
-
@hours = hours.to_i
-
end
-
-
def call
-
scope = base_scope
-
reason_rows = Hash.new(0)
-
-
scope.limit(5_000).each do |event|
-
metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
-
reason = metadata["reason"].to_s.presence || event.kind.to_s.presence || "unknown"
-
reason_rows[reason] += 1
-
end
-
-
{
-
window_hours: hours,
-
total: scope.count,
-
by_reason: build_reasons(reason_rows)
-
}
-
rescue StandardError
-
{ window_hours: hours, total: 0, by_reason: [] }
-
end
-
-
private
-
-
attr_reader :account, :hours
-
-
def base_scope
-
InstagramProfileEvent
-
.joins(:instagram_profile)
-
.where(instagram_profiles: { instagram_account_id: account.id })
-
.where(kind: %w[story_reply_skipped story_sync_failed story_ad_skipped])
-
.where("detected_at >= ?", hours.hours.ago)
-
end
-
-
def build_reasons(reason_rows)
-
reason_rows
-
.sort_by { |_reason, count| -count }
-
.map do |reason, count|
-
{
-
reason: reason,
-
count: count.to_i,
-
classification: classification_for(reason)
-
}
-
end
-
end
-
-
def classification_for(reason)
-
return "valid" if VALID_REASONS.include?(reason)
-
return "review" if REVIEW_REASONS.include?(reason)
-
return "valid" if reason.include?("ad") || reason.include?("sponsored")
-
-
"review"
-
end
-
end
-
end
-
module InstagramAccounts
-
class StoryArchiveItemSerializer
-
DEFAULT_PREVIEW_ENQUEUE_TTL_SECONDS = Integer(ENV.fetch("STORY_ARCHIVE_PREVIEW_ENQUEUE_TTL_SECONDS", "900"))
-
-
def initialize(event:, preview_enqueue_ttl_seconds: DEFAULT_PREVIEW_ENQUEUE_TTL_SECONDS)
-
@event = event
-
@preview_enqueue_ttl_seconds = preview_enqueue_ttl_seconds.to_i
-
end
-
-
def call
-
metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
-
llm_meta = event.llm_comment_metadata.is_a?(Hash) ? event.llm_comment_metadata : {}
-
ownership_data = extract_ownership_data(metadata: metadata, llm_meta: llm_meta)
-
blob = event.media.blob
-
profile = event.instagram_profile
-
story_posted_at = metadata["upload_time"].presence || metadata["taken_at"].presence
-
downloaded_at = metadata["downloaded_at"].presence || event.occurred_at&.iso8601
-
-
{
-
id: event.id,
-
profile_id: event.instagram_profile_id,
-
profile_username: profile&.username.to_s,
-
profile_display_name: profile&.display_name.to_s.presence || profile&.username.to_s,
-
profile_avatar_url: profile_avatar_url(profile),
-
app_profile_url: event.instagram_profile_id ? Rails.application.routes.url_helpers.instagram_profile_path(event.instagram_profile_id) : nil,
-
instagram_profile_url: profile&.username.present? ? "https://www.instagram.com/#{profile.username}/" : nil,
-
story_posted_at: story_posted_at,
-
downloaded_at: downloaded_at,
-
media_url: blob_path(event.media),
-
media_download_url: blob_path(event.media, disposition: "attachment"),
-
media_content_type: blob&.content_type.to_s.presence || metadata["media_content_type"].to_s,
-
media_preview_image_url: media_preview_image_url(metadata: metadata),
-
video_static_frame_only: StoryArchive::MediaPreviewResolver.static_video_preview?(metadata: metadata),
-
media_bytes: metadata["media_bytes"].to_i.positive? ? metadata["media_bytes"].to_i : blob&.byte_size.to_i,
-
media_width: metadata["media_width"],
-
media_height: metadata["media_height"],
-
story_id: metadata["story_id"].to_s,
-
story_url: metadata["story_url"].to_s.presence || metadata["permalink"].to_s.presence,
-
reply_comment: metadata["reply_comment"].to_s.presence,
-
skipped: ActiveModel::Type::Boolean.new.cast(metadata["skipped"]),
-
skip_reason: metadata["skip_reason"].to_s.presence,
-
llm_generated_comment: event.llm_generated_comment,
-
llm_comment_generated_at: event.llm_comment_generated_at&.iso8601,
-
llm_comment_model: event.llm_comment_model,
-
llm_comment_provider: event.llm_comment_provider,
-
llm_comment_status: event.llm_comment_status,
-
llm_comment_attempts: event.llm_comment_attempts,
-
llm_comment_last_error: event.llm_comment_last_error,
-
llm_comment_last_error_preview: text_preview(event.llm_comment_last_error, max: 180),
-
llm_comment_relevance_score: event.llm_comment_relevance_score,
-
llm_generated_comment_preview: text_preview(event.llm_generated_comment, max: 260),
-
has_llm_comment: event.has_llm_generated_comment?,
-
story_ownership_label: ownership_data["label"].to_s.presence,
-
story_ownership_summary: ownership_data["summary"].to_s.presence,
-
story_ownership_confidence: ownership_data["confidence"]
-
}
-
end
-
-
private
-
-
attr_reader :event, :preview_enqueue_ttl_seconds
-
-
def extract_ownership_data(metadata:, llm_meta:)
-
if llm_meta["ownership_classification"].is_a?(Hash)
-
llm_meta["ownership_classification"]
-
elsif metadata["story_ownership_classification"].is_a?(Hash)
-
metadata["story_ownership_classification"]
-
elsif metadata.dig("validated_story_insights", "ownership_classification").is_a?(Hash)
-
metadata.dig("validated_story_insights", "ownership_classification")
-
else
-
{}
-
end
-
end
-
-
def blob_path(attachment, disposition: nil)
-
options = { only_path: true }
-
options[:disposition] = disposition if disposition.present?
-
Rails.application.routes.url_helpers.rails_blob_path(attachment, **options)
-
rescue StandardError
-
nil
-
end
-
-
def profile_avatar_url(profile)
-
return nil unless profile
-
-
if profile.avatar.attached?
-
blob_path(profile.avatar)
-
else
-
profile.profile_pic_url.to_s.presence
-
end
-
end
-
-
def media_preview_image_url(metadata:)
-
url = StoryArchive::MediaPreviewResolver.preferred_preview_image_url(event: event, metadata: metadata)
-
return url if url.present?
-
-
local_video_preview_representation_url
-
end
-
-
def local_video_preview_representation_url
-
return nil unless event.media.attached?
-
return nil unless event.media.blob&.content_type.to_s.start_with?("video/")
-
-
enqueue_story_preview_generation
-
nil
-
rescue StandardError
-
nil
-
end
-
-
def enqueue_story_preview_generation
-
return if event.preview_image.attached?
-
-
cache_key = "story_archive:preview_enqueue:#{event.id}"
-
Rails.cache.fetch(cache_key, expires_in: preview_enqueue_ttl_seconds.seconds) do
-
GenerateStoryPreviewImageJob.perform_later(instagram_profile_event_id: event.id)
-
true
-
end
-
rescue StandardError => e
-
Rails.logger.warn("[story_media_archive] preview enqueue failed event_id=#{event.id}: #{e.class}: #{e.message}")
-
end
-
-
def text_preview(raw, max:)
-
text = raw.to_s
-
return text if text.length <= max
-
-
"#{text[0, max]}..."
-
end
-
end
-
end
-
module InstagramAccounts
-
class StoryArchiveQuery
-
DEFAULT_PER_PAGE = 12
-
MIN_PER_PAGE = 8
-
MAX_PER_PAGE = 40
-
-
Result = Struct.new(:events, :page, :per_page, :total, :has_more, :on, keyword_init: true)
-
-
def initialize(account:, page:, per_page:, on: nil)
-
@account = account
-
@page = page.to_i
-
@per_page = per_page.to_i
-
@raw_on = on
-
end
-
-
def call
-
parsed_on = parse_archive_date(raw_on)
-
normalized_page = [page, 1].max
-
normalized_per_page = normalize_per_page
-
-
scoped = base_scope
-
scoped = scoped.where(
-
"DATE(COALESCE(instagram_profile_events.occurred_at, instagram_profile_events.detected_at, instagram_profile_events.created_at)) = ?",
-
parsed_on
-
) if parsed_on
-
scoped = scoped.order(detected_at: :desc, id: :desc)
-
-
total = scoped.count
-
events = scoped.offset((normalized_page - 1) * normalized_per_page).limit(normalized_per_page)
-
-
Result.new(
-
events: events,
-
page: normalized_page,
-
per_page: normalized_per_page,
-
total: total,
-
has_more: (normalized_page * normalized_per_page) < total,
-
on: parsed_on
-
)
-
end
-
-
private
-
-
attr_reader :account, :page, :per_page, :raw_on
-
-
def base_scope
-
InstagramProfileEvent
-
.joins(:instagram_profile)
-
.joins(:media_attachment)
-
.includes(:instagram_profile)
-
.with_attached_media
-
.with_attached_preview_image
-
.where(
-
instagram_profiles: { instagram_account_id: account.id },
-
kind: InstagramProfileEvent::STORY_ARCHIVE_EVENT_KINDS
-
)
-
end
-
-
def normalize_per_page
-
value = per_page
-
value = DEFAULT_PER_PAGE if value <= 0
-
value.clamp(MIN_PER_PAGE, MAX_PER_PAGE)
-
end
-
-
def parse_archive_date(raw)
-
value = raw.to_s.strip
-
return nil if value.blank?
-
-
Date.iso8601(value)
-
rescue StandardError
-
nil
-
end
-
end
-
end
-
module InstagramAccounts
-
class TechnicalDetailsPayloadService
-
Result = Struct.new(:payload, :status, keyword_init: true)
-
-
def initialize(account:, event_id:)
-
@account = account
-
@event_id = event_id
-
end
-
-
def call
-
event = InstagramProfileEvent.find(event_id)
-
return not_found_result unless event.instagram_profile&.instagram_account_id == account.id
-
-
llm_meta = event.llm_comment_metadata.is_a?(Hash) ? event.llm_comment_metadata : {}
-
stored_details = llm_meta["technical_details"] || llm_meta[:technical_details]
-
technical_details = hydrate_technical_details(event: event, technical_details: stored_details)
-
-
Result.new(
-
payload: {
-
event_id: event.id,
-
has_llm_comment: event.has_llm_generated_comment?,
-
llm_comment: event.llm_generated_comment,
-
generated_at: event.llm_comment_generated_at,
-
model: event.llm_comment_model,
-
provider: event.llm_comment_provider,
-
status: event.llm_comment_status,
-
relevance_score: event.llm_comment_relevance_score,
-
last_error: event.llm_comment_last_error,
-
timeline: story_timeline_for(event: event),
-
technical_details: technical_details
-
},
-
status: :ok
-
)
-
rescue StandardError => e
-
Result.new(payload: { error: e.message }, status: :unprocessable_entity)
-
end
-
-
private
-
-
attr_reader :account, :event_id
-
-
def not_found_result
-
Result.new(payload: { error: "Event not found or not accessible" }, status: :not_found)
-
end
-
-
def hydrate_technical_details(event:, technical_details:)
-
current = technical_details.is_a?(Hash) ? technical_details.deep_stringify_keys : {}
-
has_required_sections =
-
current["local_story_intelligence"].is_a?(Hash) &&
-
current["analysis"].is_a?(Hash) &&
-
current["prompt_engineering"].is_a?(Hash)
-
return current if has_required_sections
-
-
context = event.send(:build_comment_context)
-
generated = event.send(:capture_technical_details, context)
-
generated_hash = generated.is_a?(Hash) ? generated.deep_stringify_keys : {}
-
generated_hash.deep_merge(current)
-
rescue StandardError
-
current
-
end
-
-
def story_timeline_for(event:)
-
metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
-
story = event.instagram_stories.order(taken_at: :desc, id: :desc).first
-
-
{
-
story_posted_at: metadata["upload_time"].presence || metadata["taken_at"].presence || story&.taken_at&.iso8601,
-
downloaded_to_system_at: metadata["downloaded_at"].presence || event.occurred_at&.iso8601 || event.created_at&.iso8601,
-
event_detected_at: event.detected_at&.iso8601
-
}
-
end
-
end
-
end
-
module InstagramProfiles
-
class EventsQuery
-
DEFAULT_PER_PAGE = 25
-
MIN_PER_PAGE = 10
-
MAX_PER_PAGE = 100
-
-
Result = Struct.new(:events, :total, :pages, keyword_init: true)
-
-
def initialize(profile:, params:, tabulator: TabulatorParams.new(params: params))
-
@profile = profile
-
@params = params
-
@tabulator = tabulator
-
end
-
-
def call
-
scope = base_scope
-
scope = apply_tabulator_event_filters(scope)
-
query = params[:q].to_s.strip
-
scope = apply_query(scope, query)
-
scope = apply_remote_sort(scope) || scope.order(detected_at: :desc, id: :desc)
-
-
page = normalize_page(params[:page])
-
per_page = normalize_per_page(params[:per_page].presence || params[:size].presence)
-
total = scope.count
-
pages = (total / per_page.to_f).ceil
-
rows = scope.offset((page - 1) * per_page).limit(per_page)
-
-
Result.new(events: rows, total: total, pages: pages)
-
end
-
-
private
-
-
attr_reader :profile, :params, :tabulator
-
-
def base_scope
-
profile.instagram_profile_events.with_attached_media.with_attached_preview_image
-
end
-
-
def apply_tabulator_event_filters(scope)
-
tabulator.filters.each do |filter|
-
field = filter[:field]
-
value = filter[:value]
-
next if value.blank?
-
next unless field == "kind"
-
-
term = "%#{value.to_s.downcase}%"
-
scope = scope.where("LOWER(kind) LIKE ?", term)
-
end
-
scope
-
end
-
-
def apply_query(scope, query)
-
return scope if query.blank?
-
-
term = "%#{query.downcase}%"
-
scope.where("LOWER(kind) LIKE ? OR LOWER(COALESCE(external_id, '')) LIKE ?", term, term)
-
end
-
-
def apply_remote_sort(scope)
-
first = tabulator.sorters.first
-
return nil unless first.respond_to?(:[])
-
-
field = first["field"].to_s
-
dir = first["dir"].to_s.downcase == "desc" ? "DESC" : "ASC"
-
-
case field
-
when "kind"
-
scope.order(Arel.sql("kind #{dir}, detected_at DESC, id DESC"))
-
when "occurred_at"
-
scope.order(Arel.sql("occurred_at #{dir} NULLS LAST, detected_at DESC, id DESC"))
-
when "detected_at"
-
scope.order(Arel.sql("detected_at #{dir}, id #{dir}"))
-
else
-
nil
-
end
-
end
-
-
def normalize_page(raw_page)
-
value = raw_page.to_i
-
value.positive? ? value : 1
-
end
-
-
def normalize_per_page(raw_per_page)
-
value = raw_per_page.to_i
-
value = DEFAULT_PER_PAGE if value <= 0
-
value.clamp(MIN_PER_PAGE, MAX_PER_PAGE)
-
end
-
end
-
end
-
module InstagramProfiles
-
class MutualFriendsResolver
-
def initialize(account:, profile:, client: Instagram::Client.new(account: account))
-
@account = account
-
@profile = profile
-
@client = client
-
end
-
-
def call(limit: 36)
-
rows = client.fetch_mutual_friends(profile_username: profile.username, limit: limit)
-
usernames = rows.filter_map { |entry| normalize_username(entry[:username] || entry["username"]) }
-
existing_profiles = account.instagram_profiles.where(username: usernames).with_attached_avatar.index_by(&:username)
-
-
rows.filter_map do |entry|
-
username = normalize_username(entry[:username] || entry["username"])
-
next if username.blank? || username == normalize_username(profile.username)
-
-
display_name = entry[:display_name] || entry["display_name"]
-
profile_pic_url = entry[:profile_pic_url] || entry["profile_pic_url"]
-
-
existing = existing_profiles[username]
-
if existing
-
existing.display_name = display_name if existing.display_name.blank? && display_name.present?
-
existing.profile_pic_url = profile_pic_url if existing.profile_pic_url.blank? && profile_pic_url.present?
-
existing
-
else
-
account.instagram_profiles.new(
-
username: username,
-
display_name: display_name.presence,
-
profile_pic_url: profile_pic_url.presence
-
)
-
end
-
end
-
rescue StandardError => e
-
Rails.logger.warn("Failed to resolve mutual friends for profile #{profile&.username}: #{e.class}: #{e.message}")
-
[]
-
end
-
-
private
-
-
attr_reader :account, :profile, :client
-
-
def normalize_username(value)
-
value.to_s.strip.downcase
-
end
-
end
-
end
-
module InstagramProfiles
-
class ProfilesIndexQuery
-
DEFAULT_PER_PAGE = 50
-
MIN_PER_PAGE = 10
-
MAX_PER_PAGE = 200
-
-
Result = Struct.new(
-
:q,
-
:filter,
-
:page,
-
:per_page,
-
:total,
-
:pages,
-
:profiles,
-
keyword_init: true
-
)
-
-
def initialize(account:, params:, tabulator: TabulatorParams.new(params: params))
-
@account = account
-
@params = params
-
@tabulator = tabulator
-
end
-
-
def call
-
scope = apply_tabulator_profile_filters(base_scope)
-
query = params[:q].to_s.strip
-
scope = apply_query(scope, query)
-
-
filter = {
-
mutual: tabulator.truthy?(:mutual),
-
following: tabulator.truthy?(:following),
-
follows_you: tabulator.truthy?(:follows_you),
-
can_message: tabulator.truthy?(:can_message)
-
}
-
scope = apply_filter(scope, filter: filter)
-
scope = apply_remote_sort(scope) || apply_sort(scope, params[:sort].to_s)
-
-
page = normalize_page(params[:page])
-
per_page = normalize_per_page(params[:per_page].presence || params[:size].presence)
-
total = scope.count
-
pages = (total / per_page.to_f).ceil
-
rows = scope.offset((page - 1) * per_page).limit(per_page)
-
-
Result.new(
-
q: query,
-
filter: filter,
-
page: page,
-
per_page: per_page,
-
total: total,
-
pages: pages,
-
profiles: rows
-
)
-
end
-
-
private
-
-
attr_reader :account, :params, :tabulator
-
-
def base_scope
-
account.instagram_profiles
-
end
-
-
def apply_tabulator_profile_filters(scope)
-
tabulator.filters.each do |filter|
-
field = filter[:field]
-
value = filter[:value]
-
next if value.blank?
-
-
case field
-
when "username"
-
term = "%#{value.to_s.downcase}%"
-
scope = scope.where("LOWER(username) LIKE ?", term)
-
when "display_name"
-
term = "%#{value.to_s.downcase}%"
-
scope = scope.where("LOWER(COALESCE(display_name, '')) LIKE ?", term)
-
when "following"
-
parsed = tabulator.parse_tri_bool(value)
-
scope = scope.where(following: parsed) unless parsed.nil?
-
when "follows_you"
-
parsed = tabulator.parse_tri_bool(value)
-
scope = scope.where(follows_you: parsed) unless parsed.nil?
-
when "mutual"
-
parsed = tabulator.parse_tri_bool(value)
-
if parsed == true
-
scope = scope.where(following: true, follows_you: true)
-
elsif parsed == false
-
scope = scope.where.not(following: true, follows_you: true)
-
end
-
when "can_message"
-
scope = if value.to_s == "unknown"
-
scope.where(can_message: nil)
-
else
-
parsed = tabulator.parse_tri_bool(value)
-
parsed.nil? ? scope : scope.where(can_message: parsed)
-
end
-
end
-
end
-
scope
-
end
-
-
def apply_query(scope, query)
-
return scope if query.blank?
-
-
term = "%#{query.downcase}%"
-
scope.where("LOWER(username) LIKE ? OR LOWER(display_name) LIKE ?", term, term)
-
end
-
-
def apply_filter(scope, filter:)
-
scope = scope.where(following: true, follows_you: true) if filter[:mutual]
-
scope = scope.where(following: true) if filter[:following]
-
scope = scope.where(follows_you: true) if filter[:follows_you]
-
scope = scope.where(can_message: true) if filter[:can_message]
-
scope
-
end
-
-
def apply_sort(scope, sort)
-
case sort
-
when "username_asc"
-
scope.order(Arel.sql("username ASC"))
-
when "username_desc"
-
scope.order(Arel.sql("username DESC"))
-
when "recent_sync"
-
scope.order(Arel.sql("last_synced_at DESC NULLS LAST, username ASC"))
-
when "messageable"
-
scope.order(Arel.sql("can_message DESC NULLS LAST, username ASC"))
-
when "recent_active"
-
scope.order(Arel.sql("last_active_at DESC NULLS LAST, username ASC"))
-
else
-
scope.order(Arel.sql("following DESC, follows_you DESC, username ASC"))
-
end
-
end
-
-
def apply_remote_sort(scope)
-
first = tabulator.sorters.first
-
return nil unless first.respond_to?(:[])
-
-
field = first["field"].to_s
-
dir = first["dir"].to_s.downcase == "desc" ? "DESC" : "ASC"
-
-
case field
-
when "username"
-
scope.order(Arel.sql("username #{dir}"))
-
when "display_name"
-
scope.order(Arel.sql("display_name #{dir} NULLS LAST, username ASC"))
-
when "following"
-
scope.order(Arel.sql("following #{dir}, username ASC"))
-
when "follows_you"
-
scope.order(Arel.sql("follows_you #{dir}, username ASC"))
-
when "mutual"
-
scope.order(Arel.sql("following #{dir}, follows_you #{dir}, username ASC"))
-
when "can_message"
-
scope.order(Arel.sql("can_message #{dir} NULLS LAST, username ASC"))
-
when "last_synced_at"
-
scope.order(Arel.sql("last_synced_at #{dir} NULLS LAST, username ASC"))
-
when "last_active_at"
-
scope.order(Arel.sql("last_active_at #{dir} NULLS LAST, username ASC"))
-
else
-
nil
-
end
-
end
-
-
def normalize_page(raw_page)
-
value = raw_page.to_i
-
value.positive? ? value : 1
-
end
-
-
def normalize_per_page(raw_per_page)
-
value = raw_per_page.to_i
-
value = DEFAULT_PER_PAGE if value <= 0
-
value.clamp(MIN_PER_PAGE, MAX_PER_PAGE)
-
end
-
end
-
end
-
module InstagramProfiles
-
class ShowSnapshotService
-
AVAILABLE_TAGS = %w[personal_user friend female_friend male_friend relative page excluded automatic_reply].freeze
-
-
def initialize(account:, profile:, mutual_limit: 36)
-
@account = account
-
@profile = profile
-
@mutual_limit = mutual_limit.to_i
-
end
-
-
def call
-
posts_scope = profile.instagram_profile_posts
-
profile_posts_total_count = posts_scope.count
-
deleted_posts_count = deleted_posts_count_for(posts_scope)
-
analyzed_posts_count = posts_scope.where(ai_status: "analyzed").count
-
-
behavior_profile = profile.instagram_profile_behavior_profile
-
behavior_metadata = behavior_profile&.metadata
-
behavior_metadata = {} unless behavior_metadata.is_a?(Hash)
-
history_build_state = behavior_metadata["history_build"].is_a?(Hash) ? behavior_metadata["history_build"] : {}
-
-
{
-
profile_posts_total_count: profile_posts_total_count,
-
deleted_posts_count: deleted_posts_count,
-
active_posts_count: [profile_posts_total_count - deleted_posts_count, 0].max,
-
analyzed_posts_count: analyzed_posts_count,
-
pending_posts_count: [profile_posts_total_count - analyzed_posts_count, 0].max,
-
messages_count: profile.instagram_messages.count,
-
action_logs_count: profile.instagram_profile_action_logs.count,
-
latest_analysis: profile.latest_analysis,
-
latest_story_intelligence_event: latest_story_intelligence_event,
-
available_tags: AVAILABLE_TAGS,
-
history_build_state: history_build_state,
-
history_ready: ActiveModel::Type::Boolean.new.cast(history_build_state["ready"]),
-
mutual_profiles: MutualFriendsResolver.new(account: account, profile: profile).call(limit: mutual_limit)
-
}
-
end
-
-
private
-
-
attr_reader :account, :profile, :mutual_limit
-
-
def deleted_posts_count_for(posts_scope)
-
posts_scope
-
.where.not(metadata: nil)
-
.pluck(:metadata)
-
.count { |metadata| ActiveModel::Type::Boolean.new.cast(metadata.is_a?(Hash) ? metadata["deleted_from_source"] : nil) }
-
end
-
-
def latest_story_intelligence_event
-
profile.instagram_profile_events
-
.where(kind: InstagramProfileEvent::STORY_ARCHIVE_EVENT_KINDS)
-
.order(detected_at: :desc, id: :desc)
-
.limit(60)
-
.detect do |event|
-
metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
-
story_intelligence_available_for_snapshot?(metadata: metadata)
-
end
-
end
-
-
def story_intelligence_available_for_snapshot?(metadata:)
-
intelligence = metadata["local_story_intelligence"].is_a?(Hash) ? metadata["local_story_intelligence"] : {}
-
return true if intelligence.present?
-
return true if metadata["ocr_text"].to_s.present?
-
return true if Array(metadata["content_signals"]).any?
-
return true if Array(metadata["object_detections"]).any?
-
return true if Array(metadata["ocr_blocks"]).any?
-
return true if Array(metadata["scenes"]).any?
-
-
false
-
end
-
end
-
end
-
module InstagramProfiles
-
class TabulatorEventsPayloadBuilder
-
def initialize(events:, total:, pages:, view_context:)
-
@events = events
-
@total = total
-
@pages = pages
-
@view_context = view_context
-
end
-
-
def call
-
{
-
data: events.map { |event| serialize_event(event) },
-
last_page: pages,
-
last_row: total
-
}
-
end
-
-
private
-
-
attr_reader :events, :total, :pages, :view_context
-
-
def serialize_event(event)
-
metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
-
media_attached = event.media.attached?
-
-
{
-
id: event.id,
-
kind: event.kind,
-
external_id: event.external_id,
-
occurred_at: event.occurred_at&.iso8601,
-
detected_at: event.detected_at&.iso8601,
-
metadata_json: metadata_preview_json(metadata),
-
media_content_type: media_attached ? event.media.blob.content_type : nil,
-
media_url: media_attached ? blob_path(event.media) : nil,
-
media_download_url: media_attached ? blob_path(event.media, disposition: "attachment") : nil,
-
media_preview_image_url: media_preview_image_url(event: event, metadata: metadata),
-
video_static_frame_only: StoryArchive::MediaPreviewResolver.static_video_preview?(metadata: metadata)
-
}
-
end
-
-
def media_preview_image_url(event:, metadata:)
-
url = StoryArchive::MediaPreviewResolver.preferred_preview_image_url(event: event, metadata: metadata)
-
return url if url.present?
-
-
local_video_preview_representation_url(event: event)
-
end
-
-
def local_video_preview_representation_url(event:)
-
return nil unless event.media.attached?
-
return nil unless event.media.blob&.content_type.to_s.start_with?("video/")
-
-
preview = event.media.preview(resize_to_limit: [640, 640]).processed
-
view_context.url_for(preview)
-
rescue StandardError
-
nil
-
end
-
-
def metadata_preview_json(raw_metadata)
-
json = (raw_metadata || {}).to_json
-
return json if json.length <= 1200
-
-
"#{json[0, 1200]}..."
-
end
-
-
def blob_path(attachment, disposition: nil)
-
options = { only_path: true }
-
options[:disposition] = disposition if disposition.present?
-
Rails.application.routes.url_helpers.rails_blob_path(attachment, **options)
-
end
-
end
-
end
-
module InstagramProfiles
-
class TabulatorParams
-
def initialize(params:)
-
@params = params
-
end
-
-
def filters
-
raw = params[:filters].presence || params[:filter]
-
return [] unless raw.present?
-
-
entries =
-
case raw
-
when String
-
JSON.parse(raw)
-
when Array
-
raw
-
when ActionController::Parameters
-
raw.to_unsafe_h.values
-
else
-
[]
-
end
-
-
Array(entries).filter_map do |item|
-
hash = item.respond_to?(:to_h) ? item.to_h : {}
-
field = hash["field"].to_s
-
next if field.blank?
-
-
{ field: field, value: hash["value"] }
-
end
-
rescue StandardError
-
[]
-
end
-
-
def sorters
-
raw = params[:sorters].presence || params[:sort]
-
return [] unless raw.present?
-
-
case raw
-
when String
-
parsed = JSON.parse(raw)
-
parsed.is_a?(Array) ? parsed : []
-
when Array
-
raw
-
when ActionController::Parameters
-
raw.to_unsafe_h.values
-
else
-
[]
-
end
-
rescue StandardError
-
[]
-
end
-
-
def parse_tri_bool(value)
-
normalized = value.to_s
-
return nil if normalized.blank?
-
return true if %w[true 1 yes].include?(normalized.downcase)
-
return false if %w[false 0 no].include?(normalized.downcase)
-
-
nil
-
end
-
-
def truthy?(key)
-
ActiveModel::Type::Boolean.new.cast(params[key])
-
end
-
-
private
-
-
attr_reader :params
-
end
-
end
-
module InstagramProfiles
-
class TabulatorProfilesPayloadBuilder
-
def initialize(profiles:, total:, pages:, view_context:)
-
@profiles = profiles
-
@total = total
-
@pages = pages
-
@view_context = view_context
-
end
-
-
def call
-
{
-
data: profiles.map { |profile| serialize_profile(profile) },
-
last_page: pages,
-
last_row: total
-
}
-
end
-
-
private
-
-
attr_reader :profiles, :total, :pages, :view_context
-
-
def serialize_profile(profile)
-
{
-
id: profile.id,
-
username: profile.username,
-
display_name: profile.display_name,
-
following: profile.following,
-
follows_you: profile.follows_you,
-
mutual: profile.mutual?,
-
can_message: profile.can_message,
-
restriction_reason: profile.restriction_reason,
-
last_synced_at: profile.last_synced_at&.iso8601,
-
last_active_at: profile.last_active_at&.iso8601,
-
avatar_url: avatar_url_for(profile)
-
}
-
end
-
-
def avatar_url_for(profile)
-
if profile.avatar.attached?
-
Rails.application.routes.url_helpers.rails_blob_path(profile.avatar, only_path: true)
-
elsif profile.profile_pic_url.present?
-
profile.profile_pic_url
-
else
-
view_context.asset_path("default_avatar.svg")
-
end
-
end
-
end
-
end
-
1
module Jobs
-
1
class ContextExtractor
-
1
class << self
-
1
def from_active_job_arguments(arguments)
-
2
payload = normalize_arguments_payload(arguments)
-
-
2
account_id = extract_int(payload, :instagram_account_id)
-
2
profile_id = extract_int(payload, :instagram_profile_id)
-
2
profile_post_id = extract_int(payload, :instagram_profile_post_id)
-
-
2
then: 2
scope = if profile_id.present?
-
2
else: 0
"profile"
-
then: 0
elsif account_id.present?
-
"account"
-
else: 0
else
-
"system"
-
end
-
-
{
-
2
instagram_account_id: account_id,
-
instagram_profile_id: profile_id,
-
instagram_profile_post_id: profile_post_id,
-
job_scope: scope,
-
context_label: context_label(scope: scope, account_id: account_id, profile_id: profile_id)
-
}
-
rescue StandardError
-
{
-
instagram_account_id: nil,
-
instagram_profile_id: nil,
-
instagram_profile_post_id: nil,
-
job_scope: "system",
-
context_label: "System"
-
}
-
end
-
-
1
def from_solid_queue_job_arguments(arguments)
-
then: 0
else: 0
hash = arguments.is_a?(Hash) ? arguments : {}
-
inner = hash["arguments"] || hash[:arguments]
-
from_active_job_arguments(inner)
-
end
-
-
1
def from_sidekiq_item(item)
-
then: 0
else: 0
hash = item.is_a?(Hash) ? item : {}
-
args = Array(hash["args"])
-
wrapper = args.first
-
then: 0
else: 0
if wrapper.is_a?(Hash) && wrapper["arguments"].present?
-
return from_active_job_arguments(wrapper["arguments"])
-
end
-
-
from_active_job_arguments(args)
-
end
-
-
1
private
-
-
1
def normalize_arguments_payload(arguments)
-
2
first = Array(arguments).first
-
2
then: 2
else: 0
return normalize_hash(first) if first.is_a?(Hash)
-
-
hash = normalize_hash(arguments)
-
nested = hash["arguments"] || hash[:arguments]
-
then: 0
else: 0
return normalize_arguments_payload(nested) if nested.present?
-
-
hash
-
end
-
-
1
def normalize_hash(value)
-
2
then: 2
else: 0
return value.to_h if value.respond_to?(:to_h)
-
-
{}
-
rescue StandardError
-
{}
-
end
-
-
1
def extract_int(hash, key)
-
6
value = hash[key.to_s] || hash[key.to_sym]
-
6
then: 2
else: 4
return nil if value.blank?
-
-
4
Integer(value)
-
rescue StandardError
-
nil
-
end
-
-
1
def context_label(scope:, account_id:, profile_id:)
-
2
when: 2
case scope
-
2
when: 0
when "profile" then "Profile ##{profile_id} (Account ##{account_id || '?'})"
-
else: 0
when "account" then "Account ##{account_id}"
-
else "System"
-
end
-
end
-
end
-
end
-
end
-
require "json"
-
-
module Jobs
-
class FailureRetry
-
class RetryError < StandardError; end
-
DEFAULT_AUTO_LIMIT = 20
-
DEFAULT_AUTO_MAX_ATTEMPTS = 3
-
DEFAULT_AUTO_COOLDOWN = 10.minutes
-
PIPELINE_STEP_BY_JOB_CLASS = {
-
"ProcessPostVisualAnalysisJob" => "visual",
-
"ProcessPostFaceAnalysisJob" => "face",
-
"ProcessPostOcrAnalysisJob" => "ocr",
-
"ProcessPostVideoAnalysisJob" => "video",
-
"ProcessPostMetadataTaggingJob" => "metadata",
-
"FinalizePostAnalysisPipelineJob" => nil
-
}.freeze
-
-
class << self
-
def enqueue!(failure, source: "manual")
-
raise RetryError, "Failure record is required" unless failure
-
raise RetryError, "Authentication failures must not be retried" if failure.auth_failure?
-
raise RetryError, "Failure is marked as non-retryable" unless failure.retryable_now?
-
-
job_class = failure.job_class.to_s.safe_constantize
-
raise RetryError, "Unknown job class: #{failure.job_class}" unless job_class
-
-
payload = parse_arguments(failure.arguments_json)
-
raise RetryError, "Failure is no longer actionable for retry" unless retry_actionable?(failure: failure, payload: payload)
-
-
job = perform_later(job_class: job_class, payload: payload)
-
mark_retry_enqueued!(failure: failure, source: source, job: job)
-
-
Ops::LiveUpdateBroadcaster.broadcast!(
-
topic: "jobs_changed",
-
account_id: failure.instagram_account_id,
-
payload: { action: "retry_enqueued", failed_job_id: failure.id, new_job_id: job.job_id },
-
throttle_key: "jobs_changed",
-
throttle_seconds: 0
-
)
-
-
job
-
end
-
-
def enqueue_automatic_retries!(limit: DEFAULT_AUTO_LIMIT, max_attempts: DEFAULT_AUTO_MAX_ATTEMPTS, cooldown: DEFAULT_AUTO_COOLDOWN)
-
cap = limit.to_i.clamp(1, 200)
-
attempts_cap = max_attempts.to_i.clamp(1, 10)
-
cool_down = normalize_cooldown(cooldown)
-
-
result = { scanned: 0, enqueued: 0, skipped: 0, errors: 0 }
-
each_retry_candidate(limit: cap * 5) do |failure|
-
result[:scanned] += 1
-
-
unless eligible_for_auto_retry?(failure: failure, max_attempts: attempts_cap, cooldown: cool_down)
-
result[:skipped] += 1
-
next
-
end
-
-
begin
-
enqueue!(failure, source: "auto")
-
result[:enqueued] += 1
-
rescue RetryError, StandardError => e
-
mark_retry_error!(failure: failure, error: e)
-
result[:errors] += 1
-
end
-
-
break if result[:enqueued] >= cap
-
end
-
-
Ops::StructuredLogger.info(
-
event: "jobs.failure_retry.auto_batch",
-
payload: result.merge(limit: cap, max_attempts: attempts_cap, cooldown_seconds: cool_down.to_i)
-
)
-
-
result
-
end
-
-
private
-
-
def parse_arguments(raw)
-
return [] if raw.blank?
-
-
parsed = JSON.parse(raw)
-
parsed.is_a?(Array) ? parsed : [parsed]
-
rescue StandardError
-
[]
-
end
-
-
def perform_later(job_class:, payload:)
-
if payload.length == 1 && payload.first.is_a?(Hash)
-
job_class.perform_later(**payload.first.deep_symbolize_keys)
-
else
-
job_class.perform_later(*payload)
-
end
-
rescue ArgumentError
-
job_class.perform_later(*payload)
-
end
-
-
def each_retry_candidate(limit:)
-
scope = BackgroundJobFailure.where(retryable: true).where.not(failure_kind: "authentication")
-
scope = scope.where("occurred_at >= ?", 72.hours.ago)
-
scope.order(occurred_at: :desc, id: :desc).limit(limit).to_a.each do |failure|
-
yield failure
-
end
-
end
-
-
def eligible_for_auto_retry?(failure:, max_attempts:, cooldown:)
-
state = retry_state_for(failure)
-
attempts = state["attempts"].to_i
-
return false if attempts >= max_attempts
-
return false unless retry_actionable?(failure: failure)
-
-
last_retry_at = parse_time(state["last_retry_at"])
-
return true if last_retry_at.blank?
-
-
last_retry_at <= cooldown.ago
-
end
-
-
def retry_state_for(failure)
-
metadata = failure.metadata.is_a?(Hash) ? failure.metadata : {}
-
raw = metadata["retry_state"].is_a?(Hash) ? metadata["retry_state"] : {}
-
raw.stringify_keys
-
rescue StandardError
-
{}
-
end
-
-
def mark_retry_enqueued!(failure:, source:, job:)
-
metadata = failure.metadata.is_a?(Hash) ? failure.metadata.deep_dup : {}
-
state = retry_state_for(failure)
-
attempts = state["attempts"].to_i + 1
-
state["attempts"] = attempts
-
state["last_retry_at"] = Time.current.iso8601
-
state["last_retry_job_id"] = job.job_id
-
state["last_retry_source"] = source.to_s
-
state["last_retry_error"] = nil
-
-
metadata["retry_state"] = state
-
failure.update_columns(metadata: metadata, updated_at: Time.current)
-
rescue StandardError
-
nil
-
end
-
-
def mark_retry_error!(failure:, error:)
-
metadata = failure.metadata.is_a?(Hash) ? failure.metadata.deep_dup : {}
-
state = retry_state_for(failure)
-
state["last_retry_error"] = "#{error.class}: #{error.message}"
-
state["last_retry_attempted_at"] = Time.current.iso8601
-
metadata["retry_state"] = state
-
failure.update_columns(metadata: metadata, updated_at: Time.current)
-
rescue StandardError
-
nil
-
end
-
-
def parse_time(raw)
-
return nil if raw.blank?
-
-
Time.zone.parse(raw.to_s)
-
rescue StandardError
-
nil
-
end
-
-
def normalize_cooldown(value)
-
return value if value.is_a?(ActiveSupport::Duration)
-
-
value.to_i.seconds
-
rescue StandardError
-
DEFAULT_AUTO_COOLDOWN
-
end
-
-
def retry_actionable?(failure:, payload: nil)
-
return true unless PIPELINE_STEP_BY_JOB_CLASS.key?(failure.job_class.to_s)
-
-
args = pipeline_args(payload || parse_arguments(failure.arguments_json))
-
return true unless args.present?
-
-
pipeline_run_id = args["pipeline_run_id"].to_s
-
return true if pipeline_run_id.blank?
-
-
post = pipeline_post_from_args(args)
-
return false unless post
-
-
pipeline_state = Ai::PostAnalysisPipelineState.new(post: post)
-
return false if pipeline_state.pipeline_terminal?(run_id: pipeline_run_id)
-
-
step = PIPELINE_STEP_BY_JOB_CLASS[failure.job_class.to_s]
-
return true if step.blank?
-
-
!pipeline_state.step_terminal?(run_id: pipeline_run_id, step: step)
-
rescue StandardError
-
true
-
end
-
-
def pipeline_args(payload)
-
return {} unless payload.is_a?(Array)
-
-
first = payload.first
-
return {} unless first.is_a?(Hash)
-
-
first.stringify_keys
-
end
-
-
def pipeline_post_from_args(args)
-
post_id = args["instagram_profile_post_id"].to_i
-
return nil if post_id <= 0
-
-
profile_id = args["instagram_profile_id"].to_i
-
account_id = args["instagram_account_id"].to_i
-
-
scope = InstagramProfilePost.where(id: post_id)
-
scope = scope.where(instagram_profile_id: profile_id) if profile_id.positive?
-
scope = scope.where(instagram_account_id: account_id) if account_id.positive?
-
scope.first
-
end
-
end
-
end
-
end
-
require "json"
-
require "net/http"
-
require "uri"
-
-
module Messaging
-
class IntegrationService
-
def initialize(api_url: ENV["OFFICIAL_MESSAGING_API_URL"], access_token: ENV["OFFICIAL_MESSAGING_API_TOKEN"])
-
@api_url = api_url.to_s.strip
-
@access_token = access_token.to_s
-
end
-
-
def configured?
-
@api_url.present? && @access_token.present?
-
end
-
-
def send_text!(recipient_id:, text:, context: {})
-
raise "Official messaging integration is not configured" unless configured?
-
-
uri = URI.parse(@api_url)
-
req = Net::HTTP::Post.new(uri.request_uri)
-
req["Content-Type"] = "application/json"
-
req["Authorization"] = "Bearer #{@access_token}"
-
req.body = JSON.generate(
-
recipient_id: recipient_id.to_s,
-
message: text.to_s,
-
context: context.to_h
-
)
-
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.use_ssl = (uri.scheme == "https")
-
http.open_timeout = 8
-
http.read_timeout = 20
-
-
res = http.request(req)
-
body = JSON.parse(res.body.to_s.presence || "{}") rescue {}
-
raise "Official messaging API error: HTTP #{res.code}" unless res.is_a?(Net::HTTPSuccess)
-
-
{
-
ok: true,
-
status: res.code.to_i,
-
provider_message_id: body["id"].to_s.presence || body["message_id"].to_s.presence
-
}
-
end
-
end
-
end
-
module Ops
-
class AccountIssues
-
def self.for(account)
-
issues = []
-
cookie_auth_ok = account.cookie_authenticated?
-
session_cookie_present = account.sessionid_cookie_present?
-
-
if account.cookies.blank?
-
issues << { level: :bad, message: "No cookies stored. Import cookies or run Manual Browser Login." }
-
end
-
-
if account.login_state.to_s != "authenticated" && !session_cookie_present
-
issues << { level: :bad, message: "Login state is '#{account.login_state}'. Sync and messaging will likely fail." }
-
end
-
-
if account.user_agent.to_s.strip.blank? && !cookie_auth_ok
-
issues << { level: :warn, message: "No user-agent saved. Manual login usually captures one; headless sessions can be less stable without it." }
-
end
-
-
snap = account.auth_snapshot
-
captured_at = snap["captured_at"].to_s
-
if captured_at.present?
-
begin
-
t = Time.iso8601(captured_at)
-
issues << { level: :warn, message: "Session bundle captured at #{t.strftime('%Y-%m-%d %H:%M:%S')} UTC." } if t < 30.days.ago && !cookie_auth_ok
-
rescue StandardError
-
issues << { level: :warn, message: "Auth snapshot captured_at is not parseable." } unless cookie_auth_ok
-
end
-
else
-
issues << { level: :warn, message: "No auth snapshot captured yet." } unless cookie_auth_ok
-
end
-
-
if snap["ig_app_id"].to_s.strip.blank? && !cookie_auth_ok
-
issues << { level: :warn, message: "No ig_app_id in auth snapshot. API fetches may rely on fallback headers." }
-
end
-
-
if !session_cookie_present
-
issues << { level: :bad, message: "No sessionid cookie detected. Re-authenticate this account." }
-
end
-
-
issues
-
end
-
end
-
end
-
module Ops
-
class AuditLogBuilder
-
class << self
-
def for_account(instagram_account:, limit: 120)
-
account = instagram_account
-
cap = limit.to_i.clamp(1, 500)
-
-
action_logs =
-
account.instagram_profile_action_logs
-
.includes(:instagram_profile)
-
.order(occurred_at: :desc, id: :desc)
-
.limit(cap)
-
.map do |log|
-
metadata = log.metadata.is_a?(Hash) ? log.metadata : {}
-
{
-
type: "action",
-
occurred_at: log.occurred_at || log.created_at,
-
profile_id: log.instagram_profile&.id,
-
profile_username: log.instagram_profile&.username,
-
kind: log.action.to_s,
-
status: log.status.to_s,
-
detail: log.log_text.to_s.presence || log.error_message.to_s.presence || metadata.to_s.byteslice(0, 180),
-
comment_text: metadata["comment_text"].to_s.presence || metadata["ai_reply_text"].to_s.presence || metadata["posted_comment"].to_s.presence
-
}
-
end
-
-
events =
-
InstagramProfileEvent
-
.joins(:instagram_profile)
-
.where(instagram_profiles: { instagram_account_id: account.id })
-
.includes(:instagram_profile, media_attachment: :blob, preview_image_attachment: :blob)
-
.order(detected_at: :desc, id: :desc)
-
.limit(cap)
-
.map do |event|
-
metadata = event.metadata.is_a?(Hash) ? event.metadata : {}
-
media_attached = event.media.attached?
-
{
-
type: "event",
-
occurred_at: event.occurred_at || event.detected_at || event.created_at,
-
profile_id: event.instagram_profile&.id,
-
profile_username: event.instagram_profile&.username,
-
kind: event.kind.to_s,
-
status: "recorded",
-
detail: metadata.to_s.byteslice(0, 180),
-
comment_text: metadata["comment_text"].to_s.presence || metadata["ai_reply_text"].to_s.presence || metadata["posted_comment"].to_s.presence,
-
media_attached: media_attached,
-
media_url: media_attached ? Rails.application.routes.url_helpers.rails_blob_path(event.media, only_path: true) : nil,
-
media_download_url: media_attached ? Rails.application.routes.url_helpers.rails_blob_path(event.media, disposition: "attachment", only_path: true) : nil,
-
media_content_type: media_attached ? event.media.blob&.content_type.to_s : nil,
-
media_preview_image_url: StoryArchive::MediaPreviewResolver.preferred_preview_image_url(event: event, metadata: metadata),
-
video_static_frame_only: StoryArchive::MediaPreviewResolver.static_video_preview?(metadata: metadata)
-
}
-
end
-
-
(action_logs + events)
-
.sort_by { |e| e[:occurred_at] || Time.at(0) }
-
.reverse
-
.first(cap)
-
end
-
-
private
-
end
-
end
-
end
-
require "digest"
-
-
module Ops
-
class IssueTracker
-
class << self
-
def record_job_failure!(job:, exception:, context:, failure_record:)
-
issue_type = exception.is_a?(Instagram::AuthenticationRequiredError) ? "authentication_required" : "job_failure"
-
severity = exception.is_a?(Instagram::AuthenticationRequiredError) ? "critical" : "error"
-
-
upsert_issue!(
-
issue_type: issue_type,
-
source: job.class.name,
-
severity: severity,
-
title: issue_title_for(job: job, exception: exception),
-
details: exception.message.to_s,
-
instagram_account_id: context[:instagram_account_id],
-
instagram_profile_id: context[:instagram_profile_id],
-
background_job_failure_id: failure_record&.id,
-
metadata: {
-
queue_name: job.queue_name,
-
active_job_id: job.job_id,
-
provider_job_id: job.provider_job_id,
-
error_class: exception.class.name
-
},
-
fingerprint: fingerprint_for_job_failure(job: job, exception: exception, context: context)
-
)
-
end
-
-
def record_ai_service_check!(ok:, message:, metadata: {})
-
if ok
-
resolve_by_fingerprint!(
-
fingerprint: fingerprint_for("ai_service_health", "AiDashboardController", nil, nil, "ai_microservice_offline"),
-
notes: "AI microservice healthy again."
-
)
-
return
-
end
-
-
upsert_issue!(
-
issue_type: "ai_service_unavailable",
-
source: "AiDashboardController",
-
severity: "critical",
-
title: "AI microservice unavailable",
-
details: message.to_s,
-
metadata: metadata,
-
fingerprint: fingerprint_for("ai_service_health", "AiDashboardController", nil, nil, "ai_microservice_offline")
-
)
-
end
-
-
def record_queue_health!(ok:, message:, metadata: {})
-
fingerprint = fingerprint_for("queue_health", "Sidekiq", nil, nil, "workers_or_backlog")
-
-
if ok
-
resolve_by_fingerprint!(
-
fingerprint: fingerprint,
-
notes: "Queue health recovered."
-
)
-
return
-
end
-
-
upsert_issue!(
-
issue_type: "queue_health_degraded",
-
source: "Sidekiq",
-
severity: "critical",
-
title: "Queue processing degraded",
-
details: message.to_s,
-
metadata: metadata,
-
fingerprint: fingerprint
-
)
-
end
-
-
def resolve_by_fingerprint!(fingerprint:, notes: nil)
-
issue = AppIssue.find_by(fingerprint: fingerprint.to_s)
-
return unless issue
-
return if issue.status == "resolved"
-
-
issue.mark_resolved!(notes: notes)
-
rescue StandardError => e
-
Rails.logger.warn("[ops.issue_tracker] resolve failed: #{e.class}: #{e.message}")
-
end
-
-
def upsert_issue!(issue_type:, source:, severity:, title:, details:, metadata: {}, fingerprint:, instagram_account_id: nil, instagram_profile_id: nil, background_job_failure_id: nil)
-
now = Time.current
-
issue = AppIssue.find_or_initialize_by(fingerprint: fingerprint.to_s)
-
account_id = validated_instagram_account_id(instagram_account_id)
-
profile_id = validated_instagram_profile_id(instagram_profile_id, instagram_account_id: account_id)
-
-
issue.issue_type = issue_type.to_s
-
issue.source = source.to_s
-
issue.severity = normalize_severity(severity)
-
issue.title = title.to_s
-
issue.details = details.to_s
-
issue.instagram_account_id = account_id
-
issue.instagram_profile_id = profile_id
-
issue.background_job_failure_id = background_job_failure_id
-
issue.metadata = (issue.metadata || {}).merge(metadata.to_h)
-
issue.first_seen_at ||= now
-
issue.last_seen_at = now
-
issue.occurrences = issue.new_record? ? 1 : issue.occurrences.to_i + 1
-
issue.status = "open"
-
issue.resolved_at = nil
-
issue.save!
-
issue
-
rescue StandardError => e
-
Rails.logger.warn("[ops.issue_tracker] upsert failed: #{e.class}: #{e.message}")
-
nil
-
end
-
-
private
-
-
def issue_title_for(job:, exception:)
-
if exception.is_a?(Instagram::AuthenticationRequiredError)
-
"Authentication required for #{job.class.name}"
-
else
-
"Job failure in #{job.class.name}"
-
end
-
end
-
-
def validated_instagram_account_id(raw_id)
-
id = raw_id.to_i
-
return nil unless id.positive?
-
-
InstagramAccount.where(id: id).pick(:id)
-
rescue StandardError
-
nil
-
end
-
-
def validated_instagram_profile_id(raw_id, instagram_account_id:)
-
id = raw_id.to_i
-
return nil unless id.positive?
-
-
scope = InstagramProfile.where(id: id)
-
scope = scope.where(instagram_account_id: instagram_account_id) if instagram_account_id.present?
-
scope.pick(:id)
-
rescue StandardError
-
nil
-
end
-
-
def normalize_severity(value)
-
sev = value.to_s
-
AppIssue::SEVERITIES.include?(sev) ? sev : "error"
-
end
-
-
def fingerprint_for_job_failure(job:, exception:, context:)
-
key =
-
if exception.is_a?(Instagram::AuthenticationRequiredError)
-
"authentication_required"
-
else
-
normalized_error_message(exception.message.to_s)
-
end
-
-
fingerprint_for(
-
"job_failure",
-
job.class.name,
-
context[:instagram_account_id],
-
context[:instagram_profile_id],
-
key
-
)
-
end
-
-
def fingerprint_for(issue_type, source, account_id, profile_id, key)
-
Digest::SHA256.hexdigest([issue_type, source, account_id, profile_id, key].map(&:to_s).join("|"))
-
end
-
-
def normalized_error_message(msg)
-
msg.to_s
-
.gsub(/\b\d{2,}\b/, "<n>")
-
.gsub(/[0-9a-f]{8,}/i, "<hex>")
-
.truncate(180)
-
end
-
end
-
end
-
end
-
1
module Ops
-
1
class LiveUpdateBroadcaster
-
1
THROTTLE_CACHE_PREFIX = "ops:live_update:throttle".freeze
-
-
1
class << self
-
1
def global_stream
-
"operations:global"
-
end
-
-
1
def account_stream(account_id)
-
6
"operations:account:#{account_id}"
-
end
-
-
1
def broadcast!(topic:, account_id: nil, payload: {}, throttle_key: nil, throttle_seconds: 0.8, include_global: nil)
-
6
then: 0
else: 6
return if throttled?(topic: topic, account_id: account_id, throttle_key: throttle_key, throttle_seconds: throttle_seconds)
-
-
6
normalized_account_id = account_id.to_i
-
6
then: 6
else: 0
broadcast_global = include_global.nil? ? normalized_account_id <= 0 : ActiveModel::Type::Boolean.new.cast(include_global)
-
6
message = base_message(topic: topic, payload: payload)
-
6
then: 0
else: 6
ActionCable.server.broadcast(global_stream, message) if broadcast_global
-
6
then: 6
else: 0
ActionCable.server.broadcast(account_stream(normalized_account_id), message) if normalized_account_id.positive?
-
rescue StandardError => e
-
Rails.logger.warn("[ops.live_update] broadcast failed: #{e.class}: #{e.message}")
-
end
-
-
1
private
-
-
1
def base_message(topic:, payload:)
-
{
-
6
topic: topic.to_s,
-
sent_at: Time.current.iso8601(3),
-
6
then: 6
else: 0
payload: payload.is_a?(Hash) ? payload : {}
-
}
-
end
-
-
1
def throttled?(topic:, account_id:, throttle_key:, throttle_seconds:)
-
6
ttl = throttle_seconds.to_f
-
6
then: 0
else: 6
return false if ttl <= 0
-
-
6
key = cache_key(topic: topic, account_id: account_id, throttle_key: throttle_key)
-
6
already_written = Rails.cache.read(key)
-
6
then: 0
else: 6
return true if already_written
-
-
6
Rails.cache.write(key, true, expires_in: ttl.seconds)
-
6
false
-
rescue StandardError
-
false
-
end
-
-
1
def cache_key(topic:, account_id:, throttle_key:)
-
6
suffix = throttle_key.presence || topic.to_s
-
6
"#{THROTTLE_CACHE_PREFIX}:#{account_id.to_i}:#{suffix}"
-
end
-
end
-
end
-
end
-
module Ops
-
class LocalAiHealth
-
CACHE_KEY = "ops:local_ai_health:v1".freeze
-
CACHE_TTL = ENV.fetch("AI_HEALTH_CACHE_TTL_SECONDS", "900").to_i.seconds
-
FAILURE_CACHE_TTL = ENV.fetch("AI_HEALTH_FAILURE_CACHE_TTL_SECONDS", "60").to_i.seconds
-
STALE_AFTER = ENV.fetch("AI_HEALTH_STALE_AFTER_SECONDS", "240").to_i.seconds
-
-
class << self
-
def status
-
cached = Rails.cache.read(CACHE_KEY)
-
return missing_status unless cached.present?
-
-
annotate_status(cached, source: "cache")
-
end
-
-
def check(force: false, refresh_if_stale: false)
-
cached = Rails.cache.read(CACHE_KEY)
-
if cached.present? && !force
-
annotated = annotate_status(cached, source: "cache")
-
return annotated unless refresh_if_stale && annotated[:stale]
-
end
-
-
perform_live_check
-
end
-
-
private
-
-
def perform_live_check
-
started_at = monotonic_started_at
-
checked_at = Time.current
-
-
microservice = Ai::LocalMicroserviceClient.new.test_connection!
-
ollama = Ai::OllamaClient.new.test_connection!
-
-
ok = ActiveModel::Type::Boolean.new.cast(microservice[:ok]) && ActiveModel::Type::Boolean.new.cast(ollama[:ok])
-
result = {
-
ok: ok,
-
checked_at: checked_at.iso8601(3),
-
details: {
-
microservice: microservice,
-
ollama: ollama
-
}
-
}
-
-
Rails.cache.write(CACHE_KEY, result, expires_in: CACHE_TTL)
-
track_healthcheck_metrics(result: result, started_at: started_at)
-
-
annotate_status(result, source: "live")
-
rescue StandardError => e
-
failure = {
-
ok: false,
-
checked_at: Time.current.iso8601(3),
-
error: e.message.to_s,
-
error_class: e.class.name
-
}
-
-
Rails.cache.write(CACHE_KEY, failure, expires_in: FAILURE_CACHE_TTL)
-
-
Ai::ApiUsageTracker.track_failure(
-
provider: "local_ai_stack",
-
operation: "health_check",
-
category: "healthcheck",
-
started_at: started_at,
-
error: "#{e.class}: #{e.message}",
-
metadata: failure
-
)
-
-
annotate_status(failure, source: "live")
-
end
-
-
def track_healthcheck_metrics(result:, started_at:)
-
if ActiveModel::Type::Boolean.new.cast(result[:ok])
-
Ai::ApiUsageTracker.track_success(
-
provider: "local_ai_stack",
-
operation: "health_check",
-
category: "healthcheck",
-
started_at: started_at,
-
metadata: result[:details]
-
)
-
else
-
Ai::ApiUsageTracker.track_failure(
-
provider: "local_ai_stack",
-
operation: "health_check",
-
category: "healthcheck",
-
started_at: started_at,
-
error: "One or more local AI components are unavailable",
-
metadata: result[:details]
-
)
-
end
-
end
-
-
def annotate_status(payload, source:)
-
row = payload.is_a?(Hash) ? payload.deep_symbolize_keys : {}
-
checked_at_value = row[:checked_at].to_s
-
checked_at_time = parse_timestamp(checked_at_value)
-
-
row.merge(
-
checked_at: checked_at_value.presence,
-
stale: checked_at_time.nil? || checked_at_time < STALE_AFTER.ago,
-
source: source.to_s
-
)
-
end
-
-
def parse_timestamp(value)
-
text = value.to_s.strip
-
return nil if text.blank?
-
-
Time.iso8601(text)
-
rescue StandardError
-
nil
-
end
-
-
def missing_status
-
{
-
ok: false,
-
checked_at: nil,
-
stale: true,
-
source: "missing_cache",
-
error: "No cached AI health status is available yet."
-
}
-
end
-
-
def monotonic_started_at
-
Process.clock_gettime(Process::CLOCK_MONOTONIC)
-
rescue StandardError
-
Time.current.to_f
-
end
-
end
-
end
-
end
-
require "timeout"
-
-
module Ops
-
class LocalStoryIntelligenceBackfill
-
DEFAULT_LIMIT = 100
-
EVENT_TIMEOUT_SECONDS = 45
-
-
def initialize(account_id: nil, limit: nil, enqueue_comments: false)
-
@account_id = account_id.to_s.presence
-
@limit = limit.to_i.positive? ? limit.to_i : DEFAULT_LIMIT
-
@enqueue_comments = ActiveModel::Type::Boolean.new.cast(enqueue_comments)
-
end
-
-
def backfill!
-
result = {
-
scanned: 0,
-
enriched: 0,
-
empty: 0,
-
queued: 0,
-
errors: 0,
-
reasons: Hash.new(0)
-
}
-
-
story_event_scope.each do |event|
-
break if result[:scanned] >= @limit
-
next unless event.media.attached?
-
-
result[:scanned] += 1
-
payload = with_event_timeout(event: event) { event.send(:local_story_intelligence_payload) }
-
next unless payload.is_a?(Hash)
-
-
if event.send(:local_story_intelligence_blank?, payload)
-
result[:empty] += 1
-
reason = payload[:reason].to_s.presence || "local_story_intelligence_blank"
-
result[:reasons][reason] += 1
-
next
-
end
-
-
event.send(:persist_local_story_intelligence!, payload)
-
result[:enriched] += 1
-
-
next unless @enqueue_comments
-
next unless regeneration_candidate?(event)
-
-
if enqueue_comment_job(event, requested_by: "local_story_intelligence_backfill")
-
result[:queued] += 1
-
end
-
rescue StandardError => e
-
result[:errors] += 1
-
Ops::StructuredLogger.warn(
-
event: "story_intelligence.backfill.error",
-
payload: {
-
event_id: event&.id,
-
error_class: e.class.name,
-
error_message: e.message
-
}
-
)
-
end
-
-
result[:reasons] = result[:reasons].sort_by { |_reason, count| -count }.to_h
-
log_batch(event: "story_intelligence.backfill.completed", result: result)
-
result
-
end
-
-
def requeue_generation!
-
result = {
-
scanned: 0,
-
queued: 0,
-
skipped_no_context: 0,
-
skipped_in_progress: 0,
-
skipped_not_needed: 0,
-
errors: 0
-
}
-
-
story_event_scope.each do |event|
-
break if result[:scanned] >= @limit
-
next unless event.media.attached?
-
-
result[:scanned] += 1
-
-
if event.llm_comment_in_progress?
-
result[:skipped_in_progress] += 1
-
next
-
end
-
-
unless regeneration_candidate?(event)
-
result[:skipped_not_needed] += 1
-
next
-
end
-
-
payload = with_event_timeout(event: event) { event.send(:local_story_intelligence_payload) }
-
next unless payload.is_a?(Hash)
-
if event.send(:local_story_intelligence_blank?, payload)
-
result[:skipped_no_context] += 1
-
next
-
end
-
-
event.send(:persist_local_story_intelligence!, payload)
-
result[:queued] += 1 if enqueue_comment_job(event, requested_by: "local_story_intelligence_requeue")
-
rescue StandardError => e
-
result[:errors] += 1
-
Ops::StructuredLogger.warn(
-
event: "story_intelligence.requeue.error",
-
payload: {
-
event_id: event&.id,
-
error_class: e.class.name,
-
error_message: e.message
-
}
-
)
-
end
-
-
log_batch(event: "story_intelligence.requeue.completed", result: result)
-
result
-
end
-
-
private
-
-
def story_event_scope
-
scope = InstagramProfileEvent
-
.where(kind: InstagramProfileEvent::STORY_ARCHIVE_EVENT_KINDS)
-
.includes(:instagram_profile)
-
.order(detected_at: :desc, id: :desc)
-
-
if @account_id.present?
-
scope = scope.joins(:instagram_profile).where(instagram_profiles: { instagram_account_id: @account_id })
-
end
-
-
scope
-
end
-
-
def regeneration_candidate?(event)
-
metadata = event.llm_comment_metadata.is_a?(Hash) ? event.llm_comment_metadata : {}
-
source = metadata["source"].to_s
-
pipeline = metadata["pipeline"].to_s
-
-
return true if event.llm_comment_status.to_s == "failed"
-
return true if event.llm_generated_comment.to_s.blank?
-
return true if source == "fallback"
-
return true if pipeline.present? && pipeline != "local_story_intelligence_v2"
-
-
false
-
end
-
-
def enqueue_comment_job(event, requested_by:)
-
job = GenerateLlmCommentJob.perform_later(
-
instagram_profile_event_id: event.id,
-
provider: "local",
-
requested_by: requested_by
-
)
-
event.queue_llm_comment_generation!(job_id: job.job_id)
-
true
-
rescue StandardError => e
-
Ops::StructuredLogger.warn(
-
event: "story_intelligence.comment_enqueue.error",
-
payload: {
-
event_id: event.id,
-
requested_by: requested_by,
-
error_class: e.class.name,
-
error_message: e.message
-
}
-
)
-
false
-
end
-
-
def log_batch(event:, result:)
-
Ops::StructuredLogger.info(
-
event: event,
-
payload: {
-
account_id: @account_id,
-
limit: @limit
-
}.merge(result.except(:reasons)).merge(reasons: result[:reasons])
-
)
-
end
-
-
def with_event_timeout(event:, &block)
-
Timeout.timeout(EVENT_TIMEOUT_SECONDS, &block)
-
rescue Timeout::Error
-
Ops::StructuredLogger.warn(
-
event: "story_intelligence.event_timeout",
-
payload: {
-
event_id: event&.id,
-
timeout_seconds: EVENT_TIMEOUT_SECONDS
-
}
-
)
-
nil
-
end
-
end
-
end
-
module Ops
-
class Metrics
-
API_USAGE_WINDOW = 24.hours
-
-
def self.system
-
usage_scope = AiApiCall.where(occurred_at: API_USAGE_WINDOW.ago..Time.current)
-
-
{
-
queue: queue_counts,
-
app: {
-
accounts: InstagramAccount.count,
-
continuous_processing_enabled_accounts: InstagramAccount.where(continuous_processing_enabled: true).count,
-
continuous_processing_running_accounts: InstagramAccount.where(continuous_processing_state: "running").count,
-
continuous_processing_backoff_accounts: InstagramAccount.where("continuous_processing_retry_after_at > ?", Time.current).count,
-
profiles: InstagramProfile.count,
-
messages: InstagramMessage.count,
-
profile_events: InstagramProfileEvent.count,
-
ai_analyses: AiAnalysis.count,
-
ai_api_calls: AiApiCall.count,
-
posts: InstagramPost.count,
-
sync_runs: SyncRun.count,
-
failures_24h: BackgroundJobFailure.where("occurred_at >= ?", 24.hours.ago).count,
-
visual_analysis_failures_24h: BackgroundJobFailure.where(job_class: "ProcessPostVisualAnalysisJob")
-
.where("occurred_at >= ?", 24.hours.ago).count,
-
auth_failures_24h: BackgroundJobFailure.where(failure_kind: "authentication").where("occurred_at >= ?", 24.hours.ago).count,
-
active_issues: AppIssue.where.not(status: "resolved").count,
-
storage_ingestions_24h: ActiveStorageIngestion.where("created_at >= ?", 24.hours.ago).count,
-
continuous_processing_runs_24h: SyncRun.where(kind: "continuous_processing").where("created_at >= ?", 24.hours.ago).count
-
},
-
api_usage_24h: api_usage_summary(scope: usage_scope),
-
visual_failures_24h: visual_failure_summary(scope: BackgroundJobFailure.where(job_class: "ProcessPostVisualAnalysisJob")
-
.where("occurred_at >= ?", 24.hours.ago))
-
}
-
end
-
-
def self.for_account(account)
-
usage_scope = AiApiCall.where(instagram_account_id: account.id, occurred_at: API_USAGE_WINDOW.ago..Time.current)
-
-
{
-
app: {
-
profiles: account.instagram_profiles.count,
-
mutuals: account.instagram_profiles.where(following: true, follows_you: true).count,
-
following: account.instagram_profiles.where(following: true).count,
-
followers: account.instagram_profiles.where(follows_you: true).count,
-
messages: account.instagram_messages.count,
-
profile_events: InstagramProfileEvent.joins(:instagram_profile).where(instagram_profiles: { instagram_account_id: account.id }).count,
-
ai_analyses: account.ai_analyses.count,
-
ai_api_calls: account.ai_api_calls.count,
-
posts: account.instagram_posts.count,
-
sync_runs: account.sync_runs.count,
-
failures_24h: BackgroundJobFailure.where(instagram_account_id: account.id)
-
.where("occurred_at >= ?", 24.hours.ago).count,
-
visual_analysis_failures_24h: BackgroundJobFailure.where(instagram_account_id: account.id, job_class: "ProcessPostVisualAnalysisJob")
-
.where("occurred_at >= ?", 24.hours.ago).count,
-
auth_failures_24h: BackgroundJobFailure.where(instagram_account_id: account.id, failure_kind: "authentication")
-
.where("occurred_at >= ?", 24.hours.ago).count,
-
active_issues: account.app_issues.where.not(status: "resolved").count,
-
storage_ingestions_24h: account.active_storage_ingestions.where("created_at >= ?", 24.hours.ago).count,
-
continuous_processing_state: account.continuous_processing_state,
-
continuous_processing_failure_count: account.continuous_processing_failure_count.to_i,
-
continuous_processing_backoff_active: account.continuous_processing_backoff_active?,
-
continuous_processing_runs_24h: account.sync_runs.where(kind: "continuous_processing").where("created_at >= ?", 24.hours.ago).count
-
},
-
sync_runs_by_status: account.sync_runs.group(:status).count,
-
analyses_by_status: account.ai_analyses.group(:status).count,
-
api_usage_24h: api_usage_summary(scope: usage_scope),
-
visual_failures_24h: visual_failure_summary(scope: BackgroundJobFailure.where(instagram_account_id: account.id, job_class: "ProcessPostVisualAnalysisJob")
-
.where("occurred_at >= ?", 24.hours.ago)),
-
queue: queue_counts
-
}
-
end
-
-
def self.queue_counts
-
sidekiq_backend? ? sidekiq_counts : solid_queue_counts
-
end
-
-
def self.sidekiq_counts
-
require "sidekiq/api"
-
-
queues = Sidekiq::Queue.all
-
queue_rows = queues.map { |queue| { name: queue.name, size: queue.size } }
-
-
{
-
backend: "sidekiq",
-
enqueued: queue_rows.sum { |row| row[:size].to_i },
-
scheduled: Sidekiq::ScheduledSet.new.size,
-
retries: Sidekiq::RetrySet.new.size,
-
dead: Sidekiq::DeadSet.new.size,
-
processes: Sidekiq::ProcessSet.new.size,
-
queues: queue_rows
-
}
-
rescue StandardError
-
{
-
backend: "sidekiq",
-
enqueued: 0,
-
scheduled: 0,
-
retries: 0,
-
dead: 0,
-
processes: 0,
-
queues: []
-
}
-
end
-
-
def self.solid_queue_counts
-
{
-
backend: "solid_queue",
-
ready: safe_count { SolidQueue::ReadyExecution.count },
-
scheduled: safe_count { SolidQueue::ScheduledExecution.count },
-
claimed: safe_count { SolidQueue::ClaimedExecution.count },
-
blocked: safe_count { SolidQueue::BlockedExecution.count },
-
failed: safe_count { SolidQueue::FailedExecution.count },
-
processes: safe_count { SolidQueue::Process.count }
-
}
-
end
-
-
def self.sidekiq_backend?
-
Rails.application.config.active_job.queue_adapter.to_s == "sidekiq"
-
rescue StandardError
-
false
-
end
-
-
def self.safe_count
-
yield
-
rescue StandardError
-
0
-
end
-
-
def self.api_usage_summary(scope:)
-
by_category = scope.group(:category).count.transform_keys(&:to_s)
-
by_provider = scope.group(:provider).count.transform_keys(&:to_s)
-
by_status = scope.group(:status).count.transform_keys(&:to_s)
-
by_operation =
-
scope.group(:operation).count.transform_keys(&:to_s)
-
.sort_by { |_operation, count| -count.to_i }
-
.first(10)
-
.to_h
-
-
{
-
total_calls: scope.count,
-
failed_calls: by_status["failed"].to_i,
-
image_analysis_calls: by_category["image_analysis"].to_i,
-
image_analysis_failures: scope.where(category: "image_analysis", status: "failed").count,
-
report_generation_calls: by_category["report_generation"].to_i,
-
text_generation_calls: by_category["text_generation"].to_i,
-
total_tokens: scope.sum(:total_tokens).to_i,
-
avg_latency_ms: scope.where.not(latency_ms: nil).average(:latency_ms)&.round(1),
-
by_category: by_category,
-
by_provider: by_provider,
-
by_status: by_status,
-
top_operations: by_operation
-
}
-
end
-
-
def self.visual_failure_summary(scope:)
-
top_errors =
-
scope.group(:error_class, :error_message)
-
.count
-
.sort_by { |_row, count| -count.to_i }
-
.first(5)
-
.map do |(error_class, error_message), count|
-
{
-
error_class: error_class.to_s,
-
error_message: error_message.to_s.byteslice(0, 180),
-
count: count.to_i
-
}
-
end
-
-
{
-
total_failures: scope.count,
-
by_error: top_errors
-
}
-
rescue StandardError
-
{
-
total_failures: 0,
-
by_error: []
-
}
-
end
-
end
-
end
-
module Ops
-
class QueueHealth
-
STUCK_BACKLOG_THRESHOLD = 1
-
-
def self.check!
-
counts = Ops::Metrics.queue_counts
-
return { ok: true, backend: counts[:backend].to_s } unless counts[:backend].to_s == "sidekiq"
-
-
enqueued = counts[:enqueued].to_i
-
scheduled = counts[:scheduled].to_i
-
retries = counts[:retries].to_i
-
dead = counts[:dead].to_i
-
processes = counts[:processes].to_i
-
-
no_worker_with_backlog = processes.zero? && (enqueued + scheduled + retries) >= STUCK_BACKLOG_THRESHOLD
-
-
if no_worker_with_backlog
-
message = "No Sidekiq workers detected while queue backlog is present."
-
Ops::IssueTracker.record_queue_health!(
-
ok: false,
-
message: message,
-
metadata: counts
-
)
-
Ops::StructuredLogger.error(event: "queue.health.failed", payload: counts.merge(message: message))
-
return { ok: false, reason: "no_workers_with_backlog", counts: counts }
-
end
-
-
if dead.positive?
-
Ops::StructuredLogger.warn(
-
event: "queue.health.dead_jobs_present",
-
payload: counts
-
)
-
end
-
-
Ops::IssueTracker.record_queue_health!(
-
ok: true,
-
message: "Sidekiq queue healthy.",
-
metadata: counts
-
)
-
{ ok: true, counts: counts }
-
rescue StandardError => e
-
Ops::StructuredLogger.error(
-
event: "queue.health.check_failed",
-
payload: { error_class: e.class.name, error_message: e.message }
-
)
-
{ ok: false, reason: "check_failed", error_class: e.class.name, error_message: e.message }
-
end
-
end
-
end
-
require "etc"
-
-
module Ops
-
class ResourceGuard
-
DEFAULT_MAX_LOAD_PER_CORE = ENV.fetch("AI_MAX_LOAD_PER_CORE", "1.20").to_f
-
DEFAULT_MIN_AVAILABLE_MEMORY_MB = ENV.fetch("AI_MIN_AVAILABLE_MEMORY_MB", "700").to_i
-
DEFAULT_MAX_QUEUE_DEPTH = ENV.fetch("AI_MAX_QUEUE_DEPTH", "220").to_i
-
DEFAULT_RETRY_SECONDS = ENV.fetch("AI_RESOURCE_RETRY_SECONDS", "20").to_i
-
-
class << self
-
def allow_ai_task?(task:, queue_name:, critical: false)
-
snapshot = snapshot(queue_name: queue_name)
-
overloaded = overloaded?(snapshot: snapshot)
-
-
if !overloaded || ActiveModel::Type::Boolean.new.cast(critical)
-
return {
-
allow: true,
-
reason: nil,
-
retry_in_seconds: nil,
-
snapshot: snapshot,
-
task: task.to_s
-
}
-
end
-
-
{
-
allow: false,
-
reason: reason_for(snapshot: snapshot),
-
retry_in_seconds: retry_seconds_for(snapshot: snapshot),
-
snapshot: snapshot,
-
task: task.to_s
-
}
-
rescue StandardError => e
-
{
-
allow: true,
-
reason: "resource_guard_error:#{e.class}",
-
retry_in_seconds: nil,
-
snapshot: { error: e.message.to_s },
-
task: task.to_s
-
}
-
end
-
-
def snapshot(queue_name: nil)
-
{
-
queue_name: queue_name.to_s,
-
queue_depth: queue_depth_for(queue_name: queue_name),
-
load_average_1m: load_average_1m,
-
load_per_core: load_per_core,
-
cpu_cores: cpu_cores,
-
available_memory_mb: available_memory_mb,
-
checked_at: Time.current.iso8601(3)
-
}
-
end
-
-
private
-
-
def overloaded?(snapshot:)
-
snapshot[:load_per_core].to_f > DEFAULT_MAX_LOAD_PER_CORE ||
-
snapshot[:available_memory_mb].to_i < DEFAULT_MIN_AVAILABLE_MEMORY_MB ||
-
snapshot[:queue_depth].to_i > DEFAULT_MAX_QUEUE_DEPTH
-
end
-
-
def reason_for(snapshot:)
-
return "high_queue_depth" if snapshot[:queue_depth].to_i > DEFAULT_MAX_QUEUE_DEPTH
-
return "high_cpu_load" if snapshot[:load_per_core].to_f > DEFAULT_MAX_LOAD_PER_CORE
-
return "low_available_memory" if snapshot[:available_memory_mb].to_i < DEFAULT_MIN_AVAILABLE_MEMORY_MB
-
-
"resource_pressure"
-
end
-
-
def retry_seconds_for(snapshot:)
-
case reason_for(snapshot: snapshot)
-
when "high_queue_depth"
-
DEFAULT_RETRY_SECONDS
-
when "high_cpu_load"
-
DEFAULT_RETRY_SECONDS + 10
-
when "low_available_memory"
-
DEFAULT_RETRY_SECONDS + 20
-
else
-
DEFAULT_RETRY_SECONDS
-
end
-
end
-
-
def queue_depth_for(queue_name:)
-
return 0 if queue_name.to_s.blank?
-
return 0 unless sidekiq_backend?
-
-
require "sidekiq/api"
-
-
Sidekiq::Queue.new(queue_name.to_s).size
-
rescue StandardError
-
0
-
end
-
-
def sidekiq_backend?
-
Rails.application.config.active_job.queue_adapter.to_s == "sidekiq"
-
rescue StandardError
-
false
-
end
-
-
def load_average_1m
-
File.read("/proc/loadavg").to_s.split.first.to_f
-
rescue StandardError
-
0.0
-
end
-
-
def cpu_cores
-
value = Etc.nprocessors
-
value.to_i.positive? ? value.to_i : 1
-
rescue StandardError
-
1
-
end
-
-
def load_per_core
-
load_average_1m.to_f / cpu_cores.to_f
-
rescue StandardError
-
load_average_1m.to_f
-
end
-
-
def available_memory_mb
-
line = File.readlines("/proc/meminfo").find { |row| row.start_with?("MemAvailable:") }
-
return 0 unless line
-
-
kb = line.split[1].to_i
-
(kb / 1024.0).round
-
rescue StandardError
-
0
-
end
-
end
-
end
-
end
-
1
require "json"
-
-
1
module Ops
-
1
class StructuredLogger
-
1
class << self
-
1
def info(event:, payload: {})
-
4
write(level: :info, event: event, payload: payload)
-
end
-
-
1
def warn(event:, payload: {})
-
write(level: :warn, event: event, payload: payload)
-
end
-
-
1
def error(event:, payload: {})
-
write(level: :error, event: event, payload: payload)
-
end
-
-
1
def write(level:, event:, payload: {})
-
4
logger = Rails.logger
-
4
then: 4
else: 0
method = logger.respond_to?(level) ? level : :info
-
4
logger.public_send(method, serialize(event: event, payload: payload))
-
rescue StandardError
-
nil
-
end
-
-
1
private
-
-
1
def serialize(event:, payload: {})
-
data = {
-
4
ts: Time.current.iso8601(3),
-
event: event.to_s,
-
pid: Process.pid
-
}
-
-
4
then: 4
else: 0
payload_hash = payload.is_a?(Hash) ? payload : { message: payload.to_s }
-
4
data.merge!(payload_hash.compact)
-
4
JSON.generate(data)
-
end
-
end
-
end
-
end
-
class PersonIdentityFeedbackService
-
class FeedbackError < StandardError; end
-
-
MAX_LINKED_USERNAMES = 30
-
FEEDBACK_VERSION = "v1".freeze
-
-
def confirm_person!(person:, label: nil, real_person_status: "confirmed_real_person")
-
raise FeedbackError, "Person record is required" unless person&.persisted?
-
-
now = Time.current
-
person.with_lock do
-
metadata = normalize_metadata(person.metadata)
-
feedback = normalize_feedback(metadata)
-
feedback["real_person_status"] = normalize_real_person_status(real_person_status)
-
feedback["last_action"] = "confirm_person"
-
feedback["confirmed_count"] = feedback["confirmed_count"].to_i + 1
-
feedback["last_action_at"] = now.iso8601
-
feedback["feedback_version"] = FEEDBACK_VERSION
-
metadata["user_feedback"] = feedback
-
-
linked = Array(metadata["linked_usernames"]).map { |value| normalize_username(value) }.reject(&:blank?).uniq
-
profile_username = normalize_username(person.instagram_profile&.username)
-
linked << profile_username if profile_username.present? && person.role.to_s == "primary_user"
-
metadata["linked_usernames"] = linked.first(MAX_LINKED_USERNAMES)
-
-
person.label = label.to_s.strip if label.to_s.strip.present?
-
person.metadata = metadata
-
person.save!
-
person.sync_identity_confidence!(timestamp: now)
-
person
-
end
-
end
-
-
def mark_incorrect!(person:, reason: nil)
-
raise FeedbackError, "Person record is required" unless person&.persisted?
-
-
now = Time.current
-
person.with_lock do
-
metadata = normalize_metadata(person.metadata)
-
feedback = normalize_feedback(metadata)
-
feedback["real_person_status"] = "incorrect"
-
feedback["last_action"] = "mark_incorrect"
-
feedback["last_action_at"] = now.iso8601
-
feedback["incorrect_reason"] = reason.to_s.strip if reason.to_s.strip.present?
-
feedback["feedback_version"] = FEEDBACK_VERSION
-
metadata["user_feedback"] = feedback
-
metadata["matching_disabled"] = true
-
metadata["matching_disabled_reason"] = reason.to_s.strip.presence || "marked_incorrect"
-
-
attrs = {
-
role: person.role.to_s == "primary_user" ? "unknown" : person.role,
-
metadata: metadata,
-
canonical_embedding: nil
-
}
-
attrs[:canonical_embedding_vector] = nil if person.respond_to?(:canonical_embedding_vector=)
-
person.update!(attrs)
-
annotate_face_feedback!(person: person, status: "incorrect", reason: reason)
-
person.sync_identity_confidence!(timestamp: now)
-
person
-
end
-
end
-
-
def link_profile_owner!(person:)
-
raise FeedbackError, "Person record is required" unless person&.persisted?
-
-
profile = person.instagram_profile
-
raise FeedbackError, "Profile not found for person" unless profile
-
-
now = Time.current
-
InstagramStoryPerson.transaction do
-
InstagramStoryPerson
-
.where(instagram_profile_id: profile.id, role: "primary_user")
-
.where.not(id: person.id)
-
.update_all(role: "secondary_person", updated_at: now)
-
-
person.with_lock do
-
metadata = normalize_metadata(person.metadata)
-
feedback = normalize_feedback(metadata)
-
feedback["last_action"] = "link_profile_owner"
-
feedback["last_action_at"] = now.iso8601
-
feedback["real_person_status"] = "confirmed_real_person"
-
feedback["owner_link_confirmed"] = true
-
feedback["feedback_version"] = FEEDBACK_VERSION
-
-
linked = Array(metadata["linked_usernames"]).map { |value| normalize_username(value) }.reject(&:blank?).uniq
-
profile_username = normalize_username(profile.username)
-
linked << profile_username if profile_username.present?
-
-
metadata["linked_usernames"] = linked.first(MAX_LINKED_USERNAMES)
-
metadata["user_feedback"] = feedback
-
-
person.update!(
-
role: "primary_user",
-
label: person.label.to_s.presence || profile.username.to_s,
-
metadata: metadata
-
)
-
person.sync_identity_confidence!(timestamp: now)
-
end
-
end
-
-
person
-
end
-
-
def merge_people!(source_person:, target_person:)
-
validate_merge!(source_person: source_person, target_person: target_person)
-
-
now = Time.current
-
InstagramStoryPerson.transaction do
-
source_person.lock!
-
target_person.lock!
-
-
moved_post_faces = source_person.instagram_post_faces.update_all(
-
instagram_story_person_id: target_person.id,
-
role: target_person.role.to_s,
-
updated_at: now
-
)
-
moved_story_faces = source_person.instagram_story_faces.update_all(
-
instagram_story_person_id: target_person.id,
-
role: target_person.role.to_s,
-
updated_at: now
-
)
-
-
target_metadata = merge_person_metadata!(
-
target_person: target_person,
-
source_person: source_person,
-
moved_post_faces: moved_post_faces,
-
moved_story_faces: moved_story_faces,
-
merged_at: now
-
)
-
-
target_person.update!(
-
appearance_count: recompute_appearance_count(target_person),
-
first_seen_at: [ target_person.first_seen_at, source_person.first_seen_at ].compact.min,
-
last_seen_at: [ target_person.last_seen_at, source_person.last_seen_at ].compact.max,
-
canonical_embedding: merged_embedding(target_person: target_person, source_person: source_person).presence,
-
metadata: target_metadata
-
)
-
target_person.update_column(:canonical_embedding_vector, target_person.canonical_embedding.presence) if target_person.respond_to?(:canonical_embedding_vector=)
-
target_person.sync_identity_confidence!(timestamp: now)
-
-
source_metadata = normalize_metadata(source_person.metadata)
-
source_feedback = normalize_feedback(source_metadata)
-
source_feedback["last_action"] = "merged_into_person"
-
source_feedback["last_action_at"] = now.iso8601
-
source_feedback["merged_into_person_id"] = target_person.id
-
source_feedback["feedback_version"] = FEEDBACK_VERSION
-
source_metadata["user_feedback"] = source_feedback
-
source_metadata["merged_into_person_id"] = target_person.id
-
source_metadata["merged_at"] = now.iso8601
-
source_metadata["matching_disabled"] = true
-
source_metadata["matching_disabled_reason"] = "merged_into_#{target_person.id}"
-
-
source_person.update!(
-
role: "unknown",
-
appearance_count: 0,
-
canonical_embedding: nil,
-
metadata: source_metadata
-
)
-
source_person.update_column(:canonical_embedding_vector, nil) if source_person.respond_to?(:canonical_embedding_vector=)
-
source_person.sync_identity_confidence!(timestamp: now)
-
end
-
-
target_person
-
end
-
-
def separate_face!(person:, face:)
-
raise FeedbackError, "Person record is required" unless person&.persisted?
-
raise FeedbackError, "Face record is required" unless face&.persisted?
-
raise FeedbackError, "Face is not linked to this person" unless face.instagram_story_person_id == person.id
-
-
now = Time.current
-
vector = normalize_vector(face.embedding)
-
new_metadata = {
-
"source" => "user_feedback_split",
-
"separated_from_person_id" => person.id,
-
"user_feedback" => {
-
"real_person_status" => "unverified",
-
"last_action" => "separate_face",
-
"last_action_at" => now.iso8601,
-
"feedback_version" => FEEDBACK_VERSION
-
}
-
}
-
-
attrs = {
-
instagram_account: person.instagram_account,
-
instagram_profile: person.instagram_profile,
-
role: "secondary_person",
-
first_seen_at: now,
-
last_seen_at: now,
-
appearance_count: 1,
-
canonical_embedding: vector.presence,
-
metadata: new_metadata
-
}
-
attrs[:canonical_embedding_vector] = vector if person.respond_to?(:canonical_embedding_vector=) && vector.present?
-
new_person = InstagramStoryPerson.create!(attrs)
-
-
update_face_feedback_metadata!(face: face, status: "separated", reason: "split_from_person_#{person.id}", timestamp: now)
-
face.update!(
-
instagram_story_person: new_person,
-
role: new_person.role
-
)
-
-
recompute_person_after_face_change!(person: person, timestamp: now)
-
new_person.sync_identity_confidence!(timestamp: now)
-
person.reload
-
new_person
-
end
-
-
private
-
-
def validate_merge!(source_person:, target_person:)
-
raise FeedbackError, "Source person is required" unless source_person&.persisted?
-
raise FeedbackError, "Target person is required" unless target_person&.persisted?
-
raise FeedbackError, "Source and target person cannot be the same" if source_person.id == target_person.id
-
-
if source_person.instagram_profile_id != target_person.instagram_profile_id ||
-
source_person.instagram_account_id != target_person.instagram_account_id
-
raise FeedbackError, "People can only be merged within the same account/profile"
-
end
-
end
-
-
def annotate_face_feedback!(person:, status:, reason:)
-
now = Time.current
-
person.instagram_post_faces.find_each do |face|
-
update_face_feedback_metadata!(face: face, status: status, reason: reason, timestamp: now)
-
end
-
person.instagram_story_faces.find_each do |face|
-
update_face_feedback_metadata!(face: face, status: status, reason: reason, timestamp: now)
-
end
-
end
-
-
def update_face_feedback_metadata!(face:, status:, reason:, timestamp:)
-
metadata = normalize_metadata(face.metadata)
-
feedback = metadata["user_feedback"].is_a?(Hash) ? metadata["user_feedback"].deep_dup : {}
-
feedback["status"] = status.to_s
-
feedback["reason"] = reason.to_s.strip if reason.to_s.strip.present?
-
feedback["updated_at"] = timestamp.iso8601
-
feedback["version"] = FEEDBACK_VERSION
-
metadata["user_feedback"] = feedback
-
face.update_columns(metadata: metadata, updated_at: timestamp)
-
rescue StandardError
-
nil
-
end
-
-
def merge_person_metadata!(target_person:, source_person:, moved_post_faces:, moved_story_faces:, merged_at:)
-
target_metadata = normalize_metadata(target_person.metadata)
-
source_metadata = normalize_metadata(source_person.metadata)
-
-
target_feedback = normalize_feedback(target_metadata)
-
source_feedback = normalize_feedback(source_metadata)
-
target_feedback["last_action"] = "merge_person"
-
target_feedback["last_action_at"] = merged_at.iso8601
-
target_feedback["feedback_version"] = FEEDBACK_VERSION
-
target_feedback["merge_count"] = target_feedback["merge_count"].to_i + 1
-
target_metadata["user_feedback"] = target_feedback
-
-
source_linked = Array(source_metadata["linked_usernames"]).map { |value| normalize_username(value) }.reject(&:blank?)
-
target_linked = Array(target_metadata["linked_usernames"]).map { |value| normalize_username(value) }.reject(&:blank?)
-
target_metadata["linked_usernames"] = (target_linked + source_linked).uniq.first(MAX_LINKED_USERNAMES)
-
-
merge_history = Array(target_metadata["merge_history"]).select { |row| row.is_a?(Hash) }.first(40)
-
merge_history << {
-
"source_person_id" => source_person.id,
-
"source_label" => source_person.label.to_s.presence,
-
"source_real_person_status" => source_feedback["real_person_status"].to_s.presence,
-
"moved_post_faces" => moved_post_faces.to_i,
-
"moved_story_faces" => moved_story_faces.to_i,
-
"merged_at" => merged_at.iso8601
-
}.compact
-
target_metadata["merge_history"] = merge_history.last(40)
-
target_metadata
-
end
-
-
def merged_embedding(target_person:, source_person:)
-
left = normalize_vector(target_person.canonical_embedding)
-
right = normalize_vector(source_person.canonical_embedding)
-
return left if right.empty?
-
return right if left.empty?
-
-
left_count = [ target_person.appearance_count.to_i, 1 ].max
-
right_count = [ source_person.appearance_count.to_i, 1 ].max
-
combined = left.each_with_index.map do |value, idx|
-
((value * left_count) + (right[idx] * right_count)) / (left_count + right_count).to_f
-
end
-
normalize_vector(combined)
-
end
-
-
def recompute_person_after_face_change!(person:, timestamp:)
-
remaining_count = recompute_appearance_count(person)
-
metadata = normalize_metadata(person.metadata)
-
feedback = normalize_feedback(metadata)
-
feedback["last_action"] = "separate_face_applied"
-
feedback["last_action_at"] = timestamp.iso8601
-
feedback["feedback_version"] = FEEDBACK_VERSION
-
metadata["user_feedback"] = feedback
-
-
attrs = {
-
appearance_count: remaining_count,
-
metadata: metadata
-
}
-
-
if remaining_count <= 0
-
attrs[:canonical_embedding] = nil
-
attrs[:canonical_embedding_vector] = nil if person.respond_to?(:canonical_embedding_vector=)
-
end
-
-
person.update!(attrs)
-
person.sync_identity_confidence!(timestamp: timestamp)
-
end
-
-
def recompute_appearance_count(person)
-
count = person.instagram_post_faces.count + person.instagram_story_faces.count
-
count.positive? ? count : 0
-
end
-
-
def normalize_real_person_status(value)
-
token = value.to_s.strip.presence || "confirmed_real_person"
-
return "confirmed_real_person" if token == "confirmed"
-
return "likely_real_person" if token == "likely"
-
-
token
-
end
-
-
def normalize_metadata(value)
-
value.is_a?(Hash) ? value.deep_dup : {}
-
end
-
-
def normalize_feedback(metadata)
-
value = metadata["user_feedback"]
-
value.is_a?(Hash) ? value.deep_dup : {}
-
end
-
-
def normalize_username(value)
-
token = value.to_s.strip.downcase
-
return nil if token.blank?
-
-
token = token.delete_prefix("@")
-
token = token.gsub(/[^a-z0-9._]/, "")
-
return nil unless token.length.between?(2, 30)
-
-
token
-
end
-
-
def normalize_vector(values)
-
vector = Array(values).map(&:to_f)
-
return [] if vector.empty?
-
-
norm = Math.sqrt(vector.sum { |value| value * value })
-
return [] if norm <= 0.0
-
-
vector.map { |value| value / norm }
-
end
-
end
-
class PersonalizationEngine
-
DEFAULT_PROFILE = {
-
tone: "friendly",
-
interests: [],
-
emoji_style: "moderate",
-
engagement_style: "supportive"
-
}.freeze
-
-
def build(profile:)
-
behavior = profile.instagram_profile_behavior_profile
-
summary = behavior&.behavioral_summary.is_a?(Hash) ? behavior.behavioral_summary : {}
-
-
interests = summary.fetch("content_categories", {}).to_h.keys.first(8)
-
{
-
tone: infer_tone(summary),
-
interests: interests,
-
emoji_style: infer_emoji_style(summary),
-
engagement_style: infer_engagement_style(summary)
-
}
-
rescue StandardError
-
DEFAULT_PROFILE
-
end
-
-
private
-
-
def infer_tone(summary)
-
sentiment = summary.fetch("sentiment_trend", {}).to_h.max_by { |_key, value| value.to_i }&.first.to_s
-
return "optimistic" if sentiment == "positive"
-
return "calm" if sentiment == "neutral"
-
return "empathetic" if sentiment == "negative"
-
-
"friendly"
-
end
-
-
def infer_emoji_style(summary)
-
tag_count = summary.fetch("top_hashtags", {}).to_h.values.sum(&:to_i)
-
return "light" if tag_count < 5
-
return "moderate" if tag_count < 25
-
-
"expressive"
-
end
-
-
def infer_engagement_style(summary)
-
recurring = summary.fetch("frequent_secondary_persons", []).size
-
recurring >= 3 ? "community" : "supportive"
-
end
-
end
-
module Pipeline
-
class AccountProcessingCoordinator
-
STORY_SYNC_INTERVAL = 90.minutes
-
FEED_SYNC_INTERVAL = 2.hours
-
PROFILE_SCAN_INTERVAL = 75.minutes
-
FALLBACK_PROFILE_REFRESH_INTERVAL = 45.minutes
-
-
def initialize(account:, trigger_source:, now: Time.current)
-
@account = account
-
@trigger_source = trigger_source.to_s.presence || "unspecified"
-
@now = now
-
end
-
-
def run!
-
stats = {
-
trigger_source: @trigger_source,
-
started_at: @now.iso8601(3),
-
enqueued_jobs: [],
-
skipped_jobs: []
-
}
-
-
health = Ops::LocalAiHealth.check
-
stats[:local_ai_health] = health
-
-
if due_for_story_sync?
-
if health_ok?(health)
-
enqueue_story_sync!(stats)
-
@account.continuous_processing_next_story_sync_at = next_time(STORY_SYNC_INTERVAL)
-
else
-
stats[:skipped_jobs] << { job: "SyncHomeStoryCarouselJob", reason: "local_ai_unhealthy" }
-
end
-
end
-
-
if due_for_feed_sync?
-
if health_ok?(health)
-
enqueue_feed_engagement!(stats)
-
@account.continuous_processing_next_feed_sync_at = next_time(FEED_SYNC_INTERVAL)
-
else
-
stats[:skipped_jobs] << { job: "AutoEngageHomeFeedJob", reason: "local_ai_unhealthy" }
-
end
-
end
-
-
if due_for_profile_scan?
-
if health_ok?(health)
-
enqueue_profile_scan!(stats)
-
@account.continuous_processing_next_profile_scan_at = next_time(PROFILE_SCAN_INTERVAL)
-
else
-
enqueue_profile_refresh_fallback!(stats)
-
@account.continuous_processing_next_profile_scan_at = next_time(FALLBACK_PROFILE_REFRESH_INTERVAL)
-
end
-
end
-
-
enqueue_workspace_actions!(stats)
-
-
@account.update!(
-
continuous_processing_last_heartbeat_at: Time.current,
-
continuous_processing_next_story_sync_at: @account.continuous_processing_next_story_sync_at,
-
continuous_processing_next_feed_sync_at: @account.continuous_processing_next_feed_sync_at,
-
continuous_processing_next_profile_scan_at: @account.continuous_processing_next_profile_scan_at
-
)
-
-
stats[:finished_at] = Time.current.iso8601(3)
-
stats
-
end
-
-
private
-
-
def due_for_story_sync?
-
due?(@account.continuous_processing_next_story_sync_at)
-
end
-
-
def due_for_feed_sync?
-
due?(@account.continuous_processing_next_feed_sync_at)
-
end
-
-
def due_for_profile_scan?
-
due?(@account.continuous_processing_next_profile_scan_at)
-
end
-
-
def due?(timestamp)
-
timestamp.blank? || timestamp <= @now
-
end
-
-
def health_ok?(health)
-
ActiveModel::Type::Boolean.new.cast(health.is_a?(Hash) ? health[:ok] : false)
-
end
-
-
def enqueue_story_sync!(stats)
-
job = SyncHomeStoryCarouselJob.perform_later(
-
instagram_account_id: @account.id,
-
story_limit: SyncHomeStoryCarouselJob::STORY_BATCH_LIMIT,
-
auto_reply_only: false
-
)
-
-
stats[:enqueued_jobs] << {
-
job: "SyncHomeStoryCarouselJob",
-
active_job_id: job.job_id,
-
queue: job.queue_name,
-
story_limit: SyncHomeStoryCarouselJob::STORY_BATCH_LIMIT
-
}
-
-
Ops::StructuredLogger.info(
-
event: "continuous_processing.story_sync_enqueued",
-
payload: {
-
account_id: @account.id,
-
active_job_id: job.job_id,
-
trigger_source: @trigger_source
-
}
-
)
-
end
-
-
def enqueue_feed_engagement!(stats)
-
job = AutoEngageHomeFeedJob.perform_later(
-
instagram_account_id: @account.id,
-
max_posts: 2,
-
include_story: false,
-
story_hold_seconds: 18
-
)
-
-
stats[:enqueued_jobs] << {
-
job: "AutoEngageHomeFeedJob",
-
active_job_id: job.job_id,
-
queue: job.queue_name,
-
max_posts: 2
-
}
-
-
Ops::StructuredLogger.info(
-
event: "continuous_processing.feed_engagement_enqueued",
-
payload: {
-
account_id: @account.id,
-
active_job_id: job.job_id,
-
trigger_source: @trigger_source
-
}
-
)
-
end
-
-
def enqueue_profile_scan!(stats)
-
job = EnqueueRecentProfilePostScansForAccountJob.perform_later(
-
instagram_account_id: @account.id,
-
limit_per_account: 6,
-
posts_limit: 3,
-
comments_limit: 8
-
)
-
-
stats[:enqueued_jobs] << {
-
job: "EnqueueRecentProfilePostScansForAccountJob",
-
active_job_id: job.job_id,
-
queue: job.queue_name,
-
limit_per_account: 6,
-
posts_limit: 3,
-
comments_limit: 8
-
}
-
-
Ops::StructuredLogger.info(
-
event: "continuous_processing.profile_scan_enqueued",
-
payload: {
-
account_id: @account.id,
-
active_job_id: job.job_id,
-
trigger_source: @trigger_source
-
}
-
)
-
end
-
-
def enqueue_profile_refresh_fallback!(stats)
-
job = SyncNextProfilesForAccountJob.perform_later(
-
instagram_account_id: @account.id,
-
limit: 10
-
)
-
-
stats[:enqueued_jobs] << {
-
job: "SyncNextProfilesForAccountJob",
-
active_job_id: job.job_id,
-
queue: job.queue_name,
-
limit: 10,
-
fallback_reason: "local_ai_unhealthy"
-
}
-
-
Ops::StructuredLogger.warn(
-
event: "continuous_processing.profile_refresh_fallback_enqueued",
-
payload: {
-
account_id: @account.id,
-
active_job_id: job.job_id,
-
trigger_source: @trigger_source
-
}
-
)
-
end
-
-
def enqueue_workspace_actions!(stats)
-
result = Workspace::ActionsTodoQueueService.new(
-
account: @account,
-
limit: 40,
-
enqueue_processing: true
-
).fetch!
-
queue_stats = result[:stats].is_a?(Hash) ? result[:stats] : {}
-
-
stats[:enqueued_jobs] << {
-
job: "Workspace::ActionsTodoQueueService",
-
source: "continuous_processing",
-
queued_now: queue_stats[:enqueued_now].to_i,
-
ready_items: queue_stats[:ready_items].to_i,
-
processing_items: queue_stats[:processing_items].to_i,
-
total_items: queue_stats[:total_items].to_i
-
}
-
-
Ops::StructuredLogger.info(
-
event: "continuous_processing.workspace_actions_refreshed",
-
payload: {
-
account_id: @account.id,
-
trigger_source: @trigger_source,
-
queued_now: queue_stats[:enqueued_now].to_i,
-
ready_items: queue_stats[:ready_items].to_i,
-
processing_items: queue_stats[:processing_items].to_i,
-
total_items: queue_stats[:total_items].to_i
-
}
-
)
-
rescue StandardError => e
-
stats[:skipped_jobs] << {
-
job: "Workspace::ActionsTodoQueueService",
-
reason: "workspace_queue_refresh_failed",
-
error_class: e.class.name
-
}
-
-
Ops::StructuredLogger.warn(
-
event: "continuous_processing.workspace_actions_refresh_failed",
-
payload: {
-
account_id: @account.id,
-
trigger_source: @trigger_source,
-
error_class: e.class.name,
-
error_message: e.message.to_s.byteslice(0, 280)
-
}
-
)
-
end
-
-
def next_time(interval)
-
@now + jitter(interval)
-
end
-
-
def jitter(interval)
-
seconds = interval.to_i
-
jitter = (seconds * 0.12).to_i
-
seconds + rand(0..jitter)
-
end
-
end
-
end
-
class PostFaceRecognitionService
-
DEFAULT_MATCH_MIN_CONFIDENCE = ENV.fetch("POST_FACE_MATCH_MIN_CONFIDENCE", "0.78").to_f
-
-
def initialize(
-
face_detection_service: FaceDetectionService.new,
-
face_embedding_service: FaceEmbeddingService.new,
-
vector_matching_service: VectorMatchingService.new,
-
face_identity_resolution_service: FaceIdentityResolutionService.new,
-
match_min_confidence: nil
-
)
-
@face_detection_service = face_detection_service
-
@face_embedding_service = face_embedding_service
-
@vector_matching_service = vector_matching_service
-
@face_identity_resolution_service = face_identity_resolution_service
-
@match_min_confidence = begin
-
value = match_min_confidence.nil? ? DEFAULT_MATCH_MIN_CONFIDENCE : match_min_confidence.to_f
-
value.negative? ? DEFAULT_MATCH_MIN_CONFIDENCE : value
-
rescue StandardError
-
DEFAULT_MATCH_MIN_CONFIDENCE
-
end
-
end
-
-
def process!(post:)
-
return { skipped: true, reason: "post_missing" } unless post
-
return { skipped: true, reason: "media_missing" } unless post.media.attached?
-
-
source_payload = load_face_detection_payload(post: post)
-
if source_payload[:skipped]
-
persist_face_recognition_metadata!(
-
post: post,
-
attributes: {
-
"face_count" => post.instagram_post_faces.count,
-
"matched_people" => [],
-
"detection_source" => source_payload[:detection_source].to_s.presence || source_payload[:content_type].to_s.presence || "unknown",
-
"detection_reason" => source_payload[:reason].to_s.presence || "face_detection_skipped",
-
"detection_error" => source_payload[:error].to_s.presence,
-
"updated_at" => Time.current.iso8601
-
}.compact
-
)
-
return source_payload
-
end
-
-
image_bytes = source_payload[:image_bytes]
-
detection = @face_detection_service.detect(
-
media_payload: {
-
story_id: "post:#{post.id}",
-
image_bytes: image_bytes
-
}
-
)
-
detection_metadata = detection[:metadata].is_a?(Hash) ? detection[:metadata] : {}
-
detection_reason = detection_metadata[:reason].to_s.presence || detection_metadata["reason"].to_s.presence
-
detection_error = detection_metadata[:error_message].to_s.presence || detection_metadata["error_message"].to_s.presence
-
-
if detection_reason.present?
-
persist_face_recognition_metadata!(
-
post: post,
-
attributes: {
-
"face_count" => post.instagram_post_faces.count,
-
"matched_people" => [],
-
"detection_source" => source_payload[:detection_source],
-
"detection_reason" => detection_reason,
-
"detection_error" => detection_error,
-
"detection_warnings" => Array(detection_metadata[:warnings] || detection_metadata["warnings"]).first(20),
-
"updated_at" => Time.current.iso8601
-
}.compact
-
)
-
return {
-
skipped: true,
-
reason: "face_detection_failed",
-
detection_reason: detection_reason,
-
detection_error: detection_error
-
}
-
end
-
-
post.instagram_post_faces.delete_all
-
matches = []
-
linked_face_count = 0
-
low_confidence_filtered_count = 0
-
-
Array(detection[:faces]).each_with_index do |face, index|
-
observation_signature = face_observation_signature(
-
post: post,
-
face: face,
-
index: index,
-
detection_source: source_payload[:detection_source]
-
)
-
confidence = face[:confidence].to_f
-
-
unless linkable_face_confidence?(confidence)
-
low_confidence_filtered_count += 1
-
persist_unlinked_face!(
-
post: post,
-
face: face,
-
observation_signature: observation_signature,
-
source: source_payload[:detection_source],
-
reason: "low_confidence"
-
)
-
next
-
end
-
-
embedding_payload = @face_embedding_service.embed(
-
media_payload: {
-
story_id: "post:#{post.id}",
-
media_type: "image",
-
image_bytes: image_bytes
-
},
-
face: face
-
)
-
vector = Array(embedding_payload[:vector]).map(&:to_f)
-
if vector.empty?
-
persist_unlinked_face!(
-
post: post,
-
face: face,
-
observation_signature: observation_signature,
-
source: source_payload[:detection_source],
-
reason: "embedding_unavailable"
-
)
-
next
-
end
-
-
match = @vector_matching_service.match_or_create!(
-
account: post.instagram_account,
-
profile: post.instagram_profile,
-
embedding: vector,
-
occurred_at: post.taken_at || Time.current,
-
observation_signature: observation_signature
-
)
-
-
person = match[:person]
-
update_person_face_attributes!(person: person, face: face)
-
post.instagram_post_faces.create!(
-
instagram_story_person: person,
-
role: match[:role].to_s.presence || "unknown",
-
detector_confidence: confidence,
-
match_similarity: match[:similarity],
-
embedding_version: embedding_payload[:version].to_s,
-
embedding: vector,
-
bounding_box: face[:bounding_box],
-
metadata: face_record_metadata(
-
source: source_payload[:detection_source],
-
face: face,
-
observation_signature: observation_signature,
-
link_status: "matched"
-
)
-
)
-
linked_face_count += 1
-
-
matches << {
-
person_id: person.id,
-
role: match[:role],
-
label: person.label,
-
similarity: match[:similarity],
-
owner_match: match[:role].to_s == "primary_user",
-
recurring_face: person.appearance_count.to_i > 1,
-
appearances: person.appearance_count.to_i,
-
real_person_status: person.real_person_status,
-
identity_confidence: person.identity_confidence
-
}.compact
-
end
-
-
total_detected_faces = Array(detection[:faces]).length
-
persist_face_recognition_metadata!(
-
post: post,
-
attributes: {
-
"face_count" => total_detected_faces,
-
"linked_face_count" => linked_face_count,
-
"unlinked_face_count" => [ total_detected_faces - linked_face_count, 0 ].max,
-
"low_confidence_filtered_count" => low_confidence_filtered_count,
-
"min_match_confidence" => @match_min_confidence.round(3),
-
"matched_people" => matches,
-
"detection_source" => source_payload[:detection_source],
-
"ocr_text" => detection[:ocr_text].to_s,
-
"objects" => Array(detection[:content_signals]),
-
"hashtags" => Array(detection[:hashtags]),
-
"mentions" => Array(detection[:mentions]),
-
"profile_handles" => Array(detection[:profile_handles]),
-
"detection_warnings" => Array(detection_metadata[:warnings] || detection_metadata["warnings"]).first(20),
-
"updated_at" => Time.current.iso8601
-
}.compact
-
)
-
-
identity_resolution = @face_identity_resolution_service.resolve_for_post!(
-
post: post,
-
extracted_usernames: (
-
Array(detection[:mentions]) +
-
Array(detection[:profile_handles]) +
-
detection[:ocr_text].to_s.scan(/@[a-zA-Z0-9._]{2,30}/)
-
),
-
content_summary: detection
-
)
-
-
if identity_resolution.is_a?(Hash) && identity_resolution[:summary].is_a?(Hash)
-
persist_face_recognition_metadata!(
-
post: post,
-
attributes: {
-
"identity" => identity_resolution[:summary],
-
"participant_summary" => identity_resolution[:summary][:participant_summary_text].to_s
-
}
-
)
-
end
-
-
{
-
skipped: false,
-
face_count: total_detected_faces,
-
linked_face_count: linked_face_count,
-
low_confidence_filtered_count: low_confidence_filtered_count,
-
matched_people: matches,
-
identity_resolution: identity_resolution
-
}
-
rescue StandardError => e
-
if post&.persisted?
-
persist_face_recognition_metadata!(
-
post: post,
-
attributes: {
-
"face_count" => post.instagram_post_faces.count,
-
"matched_people" => [],
-
"detection_source" => "post_face_recognition",
-
"detection_reason" => "recognition_error",
-
"detection_error" => e.message.to_s,
-
"updated_at" => Time.current.iso8601
-
}
-
)
-
end
-
-
{
-
skipped: true,
-
reason: "recognition_error",
-
error: e.message.to_s
-
}
-
end
-
-
private
-
-
def persist_face_recognition_metadata!(post:, attributes:)
-
post.with_lock do
-
post.reload
-
metadata = post.metadata.is_a?(Hash) ? post.metadata.deep_dup : {}
-
current = metadata["face_recognition"].is_a?(Hash) ? metadata["face_recognition"].deep_dup : {}
-
metadata["face_recognition"] = current.merge(attributes.to_h.compact)
-
post.update!(metadata: metadata)
-
end
-
rescue StandardError
-
nil
-
end
-
-
def load_face_detection_payload(post:)
-
content_type = post.media.blob&.content_type.to_s
-
if content_type.start_with?("image/")
-
return {
-
skipped: false,
-
image_bytes: post.media.download,
-
detection_source: "post_media_image",
-
content_type: content_type
-
}
-
end
-
-
if content_type.start_with?("video/")
-
if post.preview_image.attached?
-
return {
-
skipped: false,
-
image_bytes: post.preview_image.download,
-
detection_source: "post_preview_image",
-
content_type: post.preview_image.blob&.content_type.to_s
-
}
-
end
-
-
begin
-
generated_preview = post.media.preview(resize_to_limit: [ 960, 960 ]).processed
-
preview_blob = generated_preview.respond_to?(:image) ? generated_preview.image : nil
-
return {
-
skipped: false,
-
image_bytes: generated_preview.download,
-
detection_source: "post_generated_video_preview",
-
content_type: preview_blob&.content_type.to_s.presence || "image/jpeg"
-
}
-
rescue StandardError
-
return {
-
skipped: true,
-
reason: "video_preview_unavailable",
-
content_type: content_type
-
}
-
end
-
end
-
-
{
-
skipped: true,
-
reason: "unsupported_content_type",
-
content_type: content_type
-
}
-
rescue StandardError => e
-
{
-
skipped: true,
-
reason: "media_load_error",
-
error: e.message.to_s,
-
content_type: content_type.to_s
-
}
-
end
-
-
def face_observation_signature(post:, face:, index:, detection_source:)
-
bbox = face[:bounding_box].is_a?(Hash) ? face[:bounding_box] : {}
-
[
-
"post",
-
post.id,
-
detection_source.to_s,
-
index.to_i,
-
bbox["x1"],
-
bbox["y1"],
-
bbox["x2"],
-
bbox["y2"]
-
].map(&:to_s).join(":")
-
end
-
-
def linkable_face_confidence?(confidence)
-
confidence.to_f >= @match_min_confidence
-
end
-
-
def persist_unlinked_face!(post:, face:, observation_signature:, source:, reason:)
-
post.instagram_post_faces.create!(
-
instagram_story_person: nil,
-
role: "unknown",
-
detector_confidence: face[:confidence].to_f,
-
match_similarity: nil,
-
embedding_version: nil,
-
embedding: nil,
-
bounding_box: face[:bounding_box],
-
metadata: face_record_metadata(
-
source: source,
-
face: face,
-
observation_signature: observation_signature,
-
link_status: "unlinked",
-
link_skip_reason: reason
-
)
-
)
-
rescue StandardError
-
nil
-
end
-
-
def face_record_metadata(source:, face:, observation_signature:, link_status:, link_skip_reason: nil)
-
{
-
source: source,
-
landmarks: face[:landmarks],
-
likelihoods: face[:likelihoods],
-
age: face[:age],
-
age_range: face[:age_range],
-
gender: face[:gender],
-
gender_score: face[:gender_score].to_f,
-
observation_signature: observation_signature,
-
link_status: link_status,
-
link_skip_reason: link_skip_reason
-
}.compact
-
end
-
-
def update_person_face_attributes!(person:, face:)
-
return unless person
-
-
metadata = person.metadata.is_a?(Hash) ? person.metadata.deep_dup : {}
-
attrs = metadata["face_attributes"].is_a?(Hash) ? metadata["face_attributes"].deep_dup : {}
-
-
gender = face[:gender].to_s.strip.downcase
-
if gender.present?
-
gender_counts = attrs["gender_counts"].is_a?(Hash) ? attrs["gender_counts"].deep_dup : {}
-
gender_counts[gender] = gender_counts[gender].to_i + 1
-
attrs["gender_counts"] = gender_counts
-
attrs["primary_gender_cue"] = gender_counts.max_by { |_key, count| count.to_i }&.first
-
end
-
-
age_range = face[:age_range].to_s.strip
-
if age_range.present?
-
age_counts = attrs["age_range_counts"].is_a?(Hash) ? attrs["age_range_counts"].deep_dup : {}
-
age_counts[age_range] = age_counts[age_range].to_i + 1
-
attrs["age_range_counts"] = age_counts
-
attrs["primary_age_range"] = age_counts.max_by { |_key, count| count.to_i }&.first
-
end
-
-
age_value = face[:age].to_f
-
if age_value.positive?
-
samples = Array(attrs["age_samples"]).map(&:to_f).first(19)
-
samples << age_value.round(1)
-
attrs["age_samples"] = samples
-
attrs["age_estimate"] = (samples.sum / samples.length.to_f).round(1)
-
end
-
-
attrs["last_observed_at"] = Time.current.iso8601
-
metadata["face_attributes"] = attrs
-
person.update_columns(metadata: metadata, updated_at: Time.current)
-
rescue StandardError
-
nil
-
end
-
end
-
class PostVideoContextExtractionService
-
MAX_VIDEO_BYTES = ENV.fetch("POST_VIDEO_CONTEXT_MAX_BYTES", 35 * 1024 * 1024).to_i
-
MAX_DYNAMIC_INTELLIGENCE_BYTES = ENV.fetch("POST_VIDEO_DYNAMIC_INTELLIGENCE_MAX_BYTES", 20 * 1024 * 1024).to_i
-
MAX_AUDIO_EXTRACTION_BYTES = ENV.fetch("POST_VIDEO_AUDIO_MAX_BYTES", 30 * 1024 * 1024).to_i
-
MAX_AUDIO_DURATION_SECONDS = ENV.fetch("POST_VIDEO_AUDIO_MAX_DURATION_SECONDS", 180).to_i
-
TRANSCRIPT_MAX_CHARS = ENV.fetch("POST_VIDEO_TRANSCRIPT_MAX_CHARS", 420).to_i
-
TOPIC_LIMIT = ENV.fetch("POST_VIDEO_TOPIC_LIMIT", 30).to_i
-
SIGNAL_LIMIT = ENV.fetch("POST_VIDEO_SIGNAL_LIMIT", 40).to_i
-
-
def initialize(
-
video_frame_change_detector_service: VideoFrameChangeDetectorService.new,
-
video_metadata_service: VideoMetadataService.new,
-
video_audio_extraction_service: VideoAudioExtractionService.new,
-
speech_transcription_service: SpeechTranscriptionService.new,
-
local_microservice_client: Ai::LocalMicroserviceClient.new,
-
content_understanding_service: StoryContentUnderstandingService.new
-
)
-
@video_frame_change_detector_service = video_frame_change_detector_service
-
@video_metadata_service = video_metadata_service
-
@video_audio_extraction_service = video_audio_extraction_service
-
@speech_transcription_service = speech_transcription_service
-
@local_microservice_client = local_microservice_client
-
@content_understanding_service = content_understanding_service
-
end
-
-
def extract(video_bytes:, reference_id:, content_type:)
-
bytes = video_bytes.to_s.b
-
return skipped_result(reason: "video_bytes_missing") if bytes.blank?
-
if bytes.bytesize > MAX_VIDEO_BYTES
-
return skipped_result(
-
reason: "video_too_large_for_context_extraction",
-
byte_size: bytes.bytesize,
-
max_bytes: MAX_VIDEO_BYTES
-
)
-
end
-
-
mode = @video_frame_change_detector_service.classify(
-
video_bytes: bytes,
-
reference_id: reference_id.to_s,
-
content_type: content_type
-
)
-
processing_mode = mode[:processing_mode].to_s.presence || "dynamic_video"
-
static_video = processing_mode == "static_image"
-
semantic_route = static_video ? "image" : "video"
-
-
probe = build_probe(
-
bytes: bytes,
-
reference_id: reference_id,
-
content_type: content_type,
-
mode: mode
-
)
-
duration_seconds = probe[:duration_seconds]
-
probe_metadata = probe[:metadata].is_a?(Hash) ? probe[:metadata] : {}
-
has_audio = ActiveModel::Type::Boolean.new.cast(probe_metadata["has_audio"] || probe_metadata[:has_audio])
-
-
audio = extract_audio_if_allowed(
-
bytes: bytes,
-
reference_id: reference_id,
-
content_type: content_type,
-
duration_seconds: duration_seconds,
-
has_audio: has_audio
-
)
-
transcript = transcribe_audio_if_available(audio: audio, reference_id: reference_id)
-
transcript_text = truncate_text(transcript[:transcript].to_s, max: TRANSCRIPT_MAX_CHARS)
-
-
local_video_intelligence = extract_local_video_intelligence_if_allowed(
-
bytes: bytes,
-
reference_id: reference_id,
-
static_video: static_video
-
)
-
static_frame_intelligence = extract_static_frame_intelligence_if_available(
-
mode: mode,
-
reference_id: reference_id,
-
static_video: static_video
-
)
-
-
detections =
-
detections_from_static_frame_intelligence(static_frame_intelligence: static_frame_intelligence) +
-
detections_from_local_intelligence(local_video_intelligence: local_video_intelligence)
-
understanding = @content_understanding_service.build(
-
media_type: semantic_route,
-
detections: detections,
-
transcript_text: transcript_text
-
)
-
-
topics = normalize_string_array(understanding[:topics], limit: TOPIC_LIMIT)
-
objects = normalize_string_array(understanding[:objects], limit: SIGNAL_LIMIT)
-
hashtags = normalize_string_array(understanding[:hashtags], limit: SIGNAL_LIMIT)
-
mentions = normalize_string_array(understanding[:mentions], limit: SIGNAL_LIMIT)
-
profile_handles = normalize_string_array(understanding[:profile_handles], limit: SIGNAL_LIMIT)
-
-
{
-
skipped: false,
-
processing_mode: processing_mode,
-
static: ActiveModel::Type::Boolean.new.cast(mode[:static]) || static_video,
-
semantic_route: semantic_route,
-
duration_seconds: duration_seconds,
-
has_audio: has_audio,
-
transcript: transcript_text.presence,
-
topics: topics,
-
objects: objects,
-
scenes: normalize_hash_array(understanding[:scenes], limit: SIGNAL_LIMIT),
-
hashtags: hashtags,
-
mentions: mentions,
-
profile_handles: profile_handles,
-
ocr_text: understanding[:ocr_text].to_s.presence,
-
ocr_blocks: normalize_hash_array(understanding[:ocr_blocks], limit: SIGNAL_LIMIT),
-
context_summary: context_summary(
-
processing_mode: processing_mode,
-
duration_seconds: duration_seconds,
-
topics: topics,
-
transcript: transcript_text
-
),
-
metadata: {
-
frame_change_detection: mode[:metadata].is_a?(Hash) ? mode[:metadata] : {},
-
video_probe: probe_metadata,
-
audio_extraction: audio[:metadata],
-
transcription: transcript[:metadata],
-
static_frame_intelligence: static_frame_intelligence[:metadata],
-
local_video_intelligence: local_video_intelligence[:metadata]
-
}
-
}
-
rescue StandardError => e
-
skipped_result(
-
reason: "video_context_extraction_error",
-
error_class: e.class.name,
-
error_message: e.message.to_s
-
)
-
end
-
-
private
-
-
def build_probe(bytes:, reference_id:, content_type:, mode:)
-
probe_metadata = mode.dig(:metadata, :video_probe)
-
probe_duration = mode[:duration_seconds]
-
-
if probe_metadata.is_a?(Hash) && (probe_duration.to_f.positive? || probe_metadata.present?)
-
return {
-
duration_seconds: probe_duration,
-
metadata: probe_metadata
-
}
-
end
-
-
@video_metadata_service.probe(
-
video_bytes: bytes,
-
story_id: reference_id.to_s,
-
content_type: content_type
-
)
-
rescue StandardError => e
-
{
-
duration_seconds: nil,
-
metadata: {
-
reason: "video_probe_failed",
-
error_class: e.class.name,
-
error_message: e.message.to_s
-
}
-
}
-
end
-
-
def extract_audio_if_allowed(bytes:, reference_id:, content_type:, duration_seconds:, has_audio:)
-
return empty_audio(reason: "no_audio_stream") unless has_audio
-
if bytes.bytesize > MAX_AUDIO_EXTRACTION_BYTES
-
return empty_audio(reason: "video_too_large_for_audio_extraction")
-
end
-
if duration_seconds.to_f.positive? && duration_seconds.to_f > MAX_AUDIO_DURATION_SECONDS
-
return empty_audio(reason: "video_too_long_for_audio_extraction")
-
end
-
-
@video_audio_extraction_service.extract(
-
video_bytes: bytes,
-
story_id: reference_id.to_s,
-
content_type: content_type
-
)
-
rescue StandardError => e
-
empty_audio(reason: "audio_extraction_error", error_class: e.class.name, error_message: e.message.to_s)
-
end
-
-
def transcribe_audio_if_available(audio:, reference_id:)
-
audio_bytes = audio[:audio_bytes].to_s.b
-
return empty_transcript(reason: "audio_unavailable") if audio_bytes.blank?
-
-
@speech_transcription_service.transcribe(
-
audio_bytes: audio_bytes,
-
story_id: reference_id.to_s
-
)
-
rescue StandardError => e
-
empty_transcript(reason: "transcription_error", error_class: e.class.name, error_message: e.message.to_s)
-
end
-
-
def extract_local_video_intelligence_if_allowed(bytes:, reference_id:, static_video:)
-
if static_video
-
return {
-
data: {},
-
metadata: { reason: "static_video_routed_to_image" }
-
}
-
end
-
if bytes.bytesize > MAX_DYNAMIC_INTELLIGENCE_BYTES
-
return {
-
data: {},
-
metadata: { reason: "video_too_large_for_dynamic_intelligence" }
-
}
-
end
-
-
data = @local_microservice_client.analyze_video_story_intelligence!(
-
video_bytes: bytes,
-
usage_context: {
-
workflow: "post_analysis_pipeline",
-
task: "video_context",
-
reference_id: reference_id.to_s
-
}
-
)
-
{
-
data: data.is_a?(Hash) ? data : {},
-
metadata: (data.is_a?(Hash) ? data["metadata"] : nil).is_a?(Hash) ? data["metadata"] : {}
-
}
-
rescue StandardError => e
-
{
-
data: {},
-
metadata: {
-
reason: "dynamic_intelligence_error",
-
error_class: e.class.name,
-
error_message: e.message.to_s
-
}
-
}
-
end
-
-
def extract_static_frame_intelligence_if_available(mode:, reference_id:, static_video:)
-
unless static_video
-
return {
-
data: {},
-
metadata: { reason: "dynamic_video_no_static_frame_analysis" }
-
}
-
end
-
-
frame_bytes = mode[:frame_bytes].to_s.b
-
if frame_bytes.blank?
-
return {
-
data: {},
-
metadata: { reason: "static_frame_missing" }
-
}
-
end
-
-
data = @local_microservice_client.detect_faces_and_ocr!(
-
image_bytes: frame_bytes,
-
usage_context: {
-
workflow: "post_analysis_pipeline",
-
task: "video_static_frame_context",
-
reference_id: reference_id.to_s
-
}
-
)
-
{
-
data: data.is_a?(Hash) ? data : {},
-
metadata: (data.is_a?(Hash) ? data["metadata"] : nil).is_a?(Hash) ? data["metadata"] : {}
-
}
-
rescue StandardError => e
-
{
-
data: {},
-
metadata: {
-
reason: "static_frame_intelligence_error",
-
error_class: e.class.name,
-
error_message: e.message.to_s
-
}
-
}
-
end
-
-
def detections_from_static_frame_intelligence(static_frame_intelligence:)
-
data = static_frame_intelligence[:data].is_a?(Hash) ? static_frame_intelligence[:data] : {}
-
return [] if data.empty?
-
-
[ {
-
faces: Array(data["faces"]).select { |row| row.is_a?(Hash) },
-
content_signals: Array(data["content_labels"]).map(&:to_s),
-
object_detections: Array(data["object_detections"]).select { |row| row.is_a?(Hash) },
-
scenes: Array(data["scenes"]).select { |row| row.is_a?(Hash) },
-
location_tags: Array(data["location_tags"]).map(&:to_s),
-
ocr_text: data["ocr_text"].to_s,
-
ocr_blocks: Array(data["ocr_blocks"]).select { |row| row.is_a?(Hash) },
-
mentions: Array(data["mentions"]).map(&:to_s),
-
hashtags: Array(data["hashtags"]).map(&:to_s),
-
profile_handles: Array(data["profile_handles"]).map(&:to_s)
-
} ]
-
end
-
-
def detections_from_local_intelligence(local_video_intelligence:)
-
data = local_video_intelligence[:data].is_a?(Hash) ? local_video_intelligence[:data] : {}
-
return [] if data.empty?
-
-
[ {
-
content_signals: Array(data["content_labels"]).map(&:to_s),
-
object_detections: Array(data["object_detections"]).select { |row| row.is_a?(Hash) },
-
scenes: Array(data["scenes"]).select { |row| row.is_a?(Hash) },
-
ocr_text: data["ocr_text"].to_s,
-
ocr_blocks: Array(data["ocr_blocks"]).select { |row| row.is_a?(Hash) },
-
mentions: Array(data["mentions"]).map(&:to_s),
-
hashtags: Array(data["hashtags"]).map(&:to_s),
-
profile_handles: Array(data["profile_handles"]).map(&:to_s)
-
} ]
-
end
-
-
def context_summary(processing_mode:, duration_seconds:, topics:, transcript:)
-
parts = []
-
if processing_mode.to_s == "static_image"
-
parts << "Static visual video detected and routed through image-style analysis."
-
end
-
if duration_seconds.to_f.positive?
-
parts << "Duration #{duration_seconds.to_f.round(2)}s."
-
end
-
if topics.any?
-
parts << "Topics: #{topics.first(6).join(', ')}."
-
end
-
if transcript.to_s.present?
-
parts << "Audio transcript: #{truncate_text(transcript, max: 140)}."
-
end
-
-
text = parts.join(" ").strip
-
text.presence
-
end
-
-
def normalize_string_array(values, limit:)
-
Array(values)
-
.map(&:to_s)
-
.map(&:strip)
-
.reject(&:blank?)
-
.uniq
-
.first(limit)
-
end
-
-
def normalize_hash_array(values, limit:)
-
Array(values).select { |row| row.is_a?(Hash) }.first(limit)
-
end
-
-
def truncate_text(value, max:)
-
text = value.to_s.strip
-
return text if text.length <= max
-
-
"#{text.byteslice(0, max)}..."
-
end
-
-
def empty_audio(reason:, error_class: nil, error_message: nil)
-
{
-
audio_bytes: nil,
-
content_type: nil,
-
metadata: {
-
source: "video_audio_extraction",
-
reason: reason.to_s,
-
error_class: error_class.to_s.presence,
-
error_message: error_message.to_s.presence
-
}.compact
-
}
-
end
-
-
def empty_transcript(reason:, error_class: nil, error_message: nil)
-
{
-
transcript: nil,
-
metadata: {
-
source: "speech_transcription",
-
reason: reason.to_s,
-
error_class: error_class.to_s.presence,
-
error_message: error_message.to_s.presence
-
}.compact
-
}
-
end
-
-
def skipped_result(reason:, byte_size: nil, max_bytes: nil, error_class: nil, error_message: nil)
-
{
-
skipped: true,
-
processing_mode: "dynamic_video",
-
static: false,
-
semantic_route: "video",
-
duration_seconds: nil,
-
has_audio: nil,
-
transcript: nil,
-
topics: [],
-
objects: [],
-
scenes: [],
-
hashtags: [],
-
mentions: [],
-
profile_handles: [],
-
ocr_text: nil,
-
ocr_blocks: [],
-
context_summary: nil,
-
metadata: {
-
reason: reason.to_s,
-
byte_size: byte_size,
-
max_bytes: max_bytes,
-
error_class: error_class.to_s.presence,
-
error_message: error_message.to_s.presence
-
}.compact
-
}
-
end
-
end
-
class ResponseGenerationService
-
def initialize(personalization_engine: PersonalizationEngine.new)
-
@personalization_engine = personalization_engine
-
end
-
-
def generate(profile:, content_understanding:, max_suggestions: 5)
-
persona = @personalization_engine.build(profile: profile)
-
topics = Array(content_understanding[:topics]).first(5)
-
sentiment = content_understanding[:sentiment].to_s
-
-
suggestions = base_templates(tone: persona[:tone], sentiment: sentiment).map do |template|
-
topic = topics.first
-
topic.present? ? template.gsub("{topic}", topic) : template.gsub(" {topic}", "")
-
end
-
-
suggestions.map!(&:strip)
-
suggestions.uniq.first(max_suggestions.to_i.clamp(1, 10))
-
end
-
-
private
-
-
def base_templates(tone:, sentiment:)
-
return empathetic_templates if tone == "empathetic" || sentiment == "negative"
-
return optimistic_templates if tone == "optimistic" || sentiment == "positive"
-
-
neutral_templates
-
end
-
-
def optimistic_templates
-
[
-
"Love this energy around {topic}.",
-
"This looks amazing, especially the {topic} moment.",
-
"Big fan of this one, great vibe.",
-
"This is strong content. Keep it coming.",
-
"Great share, this feels really authentic."
-
]
-
end
-
-
def empathetic_templates
-
[
-
"Appreciate you sharing this.",
-
"Sending support your way.",
-
"This felt real and honest.",
-
"Thanks for posting this perspective.",
-
"Rooting for you."
-
]
-
end
-
-
def neutral_templates
-
[
-
"Nice story update.",
-
"This was a good share.",
-
"Loved the {topic} angle here.",
-
"Clean and engaging post.",
-
"Great context in this one."
-
]
-
end
-
end
-
require "open3"
-
require "shellwords"
-
require "tempfile"
-
require "tmpdir"
-
require "net/http"
-
require "json"
-
-
class SpeechTranscriptionService
-
def initialize(whisper_bin: ENV.fetch("WHISPER_BIN", "whisper"), whisper_model: ENV.fetch("WHISPER_MODEL", "base"), use_microservice: ENV.fetch("USE_LOCAL_AI_MICROSERVICE", "true") == "true")
-
@whisper_bin = whisper_bin.to_s
-
@whisper_model = whisper_model.to_s
-
@use_microservice = use_microservice
-
@microservice_url = ENV.fetch("LOCAL_AI_SERVICE_URL", "http://localhost:8000")
-
end
-
-
def transcribe(audio_bytes:, story_id:)
-
return empty_result("audio_bytes_missing") if audio_bytes.blank?
-
-
# Try microservice first if enabled
-
if @use_microservice
-
microservice_result = transcribe_with_microservice(audio_bytes, story_id)
-
return microservice_result if microservice_result[:transcript].present?
-
end
-
-
# Fallback to local Whisper binary
-
return empty_result("whisper_missing") unless command_available?(@whisper_bin)
-
-
transcribe_with_binary(audio_bytes, story_id)
-
rescue StandardError => e
-
empty_result("transcription_error", stderr: e.message)
-
end
-
-
private
-
-
def transcribe_with_microservice(audio_bytes, story_id)
-
Tempfile.create([ "story_audio_#{story_id}", ".wav" ]) do |audio_file|
-
audio_file.binmode
-
audio_file.write(audio_bytes)
-
audio_file.flush
-
-
uri = URI.parse("#{@microservice_url}/transcribe/audio")
-
-
# Create multipart form data
-
boundary = "----WebKitFormBoundary#{SecureRandom.hex(16)}"
-
-
post_body = []
-
post_body << "--#{boundary}\r\n"
-
post_body << "Content-Disposition: form-data; name=\"file\"; filename=\"audio.wav\"\r\n"
-
post_body << "Content-Type: application/octet-stream\r\n\r\n"
-
post_body << audio_bytes
-
post_body << "\r\n"
-
post_body << "--#{boundary}\r\n"
-
post_body << "Content-Disposition: form-data; name=\"model\"\r\n\r\n"
-
post_body << @whisper_model
-
post_body << "\r\n"
-
post_body << "--#{boundary}--\r\n"
-
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.open_timeout = 30
-
http.read_timeout = 120
-
-
request = Net::HTTP::Post.new(uri.request_uri)
-
request["Content-Type"] = "multipart/form-data; boundary=#{boundary}"
-
request["Accept"] = "application/json"
-
request.body = post_body.join
-
-
response = http.request(request)
-
body = JSON.parse(response.body.to_s.presence || "{}")
-
-
if response.is_a?(Net::HTTPSuccess) && body["success"]
-
{
-
transcript: body["transcript"],
-
metadata: {
-
source: "local_microservice",
-
model: @whisper_model,
-
confidence: body.dig("metadata", "confidence")
-
}
-
}
-
else
-
empty_result("microservice_error", stderr: body.dig("error"))
-
end
-
end
-
rescue StandardError => e
-
empty_result("microservice_error", stderr: e.message)
-
end
-
-
def transcribe_with_binary(audio_bytes, story_id)
-
Tempfile.create([ "story_audio_#{story_id}", ".wav" ]) do |audio_file|
-
audio_file.binmode
-
audio_file.write(audio_bytes)
-
audio_file.flush
-
-
Dir.mktmpdir("story_whisper_#{story_id}_") do |output_dir|
-
cmd = [
-
@whisper_bin,
-
audio_file.path,
-
"--model", @whisper_model,
-
"--output_format", "txt",
-
"--output_dir", output_dir,
-
"--task", "transcribe"
-
]
-
_stdout, stderr, status = Open3.capture3(*cmd)
-
return empty_result("whisper_failed", stderr: stderr.to_s) unless status.success?
-
-
txt_path = Dir[File.join(output_dir, "*.txt")].first
-
return empty_result("transcript_missing") if txt_path.blank?
-
-
text = File.read(txt_path).to_s.strip
-
return empty_result("transcript_empty") if text.blank?
-
-
{
-
transcript: text,
-
metadata: {
-
source: "local_whisper_binary",
-
model: @whisper_model
-
}
-
}
-
end
-
end
-
end
-
-
def command_available?(command)
-
system("command -v #{Shellwords.escape(command)} >/dev/null 2>&1")
-
end
-
-
def empty_result(reason, stderr: nil)
-
{
-
transcript: nil,
-
metadata: {
-
source: "local_whisper",
-
reason: reason,
-
stderr: stderr.to_s.presence
-
}.compact
-
}
-
end
-
end
-
module StoryArchive
-
class MediaPreviewResolver
-
class << self
-
def static_video_preview?(metadata:)
-
data = metadata_hash(metadata)
-
processing = metadata_hash(data["processing_metadata"])
-
frame_change = metadata_hash(processing["frame_change_detection"])
-
local_intelligence = metadata_hash(data["local_story_intelligence"])
-
-
processing["source"].to_s == "video_static_single_frame" ||
-
frame_change["processing_mode"].to_s == "static_image" ||
-
local_intelligence["video_processing_mode"].to_s == "static_image"
-
end
-
-
def preferred_preview_image_url(event:, metadata:)
-
preview_image_path(event) || metadata_preview_image_url(metadata: metadata)
-
end
-
-
def metadata_preview_image_url(metadata:)
-
data = metadata_hash(metadata)
-
direct = data["image_url"].to_s.presence
-
return direct if direct.present?
-
-
variants = Array(data["carousel_media"])
-
candidate = variants.find { |entry| entry.is_a?(Hash) && entry["image_url"].to_s.present? }
-
candidate.is_a?(Hash) ? candidate["image_url"].to_s.presence : nil
-
end
-
-
private
-
-
def preview_image_path(event)
-
return nil unless event.respond_to?(:preview_image)
-
return nil unless event.preview_image.attached?
-
-
Rails.application.routes.url_helpers.rails_blob_path(event.preview_image, only_path: true)
-
rescue StandardError
-
nil
-
end
-
-
def metadata_hash(value)
-
value.is_a?(Hash) ? value : {}
-
end
-
end
-
end
-
end
-
class StoryContentUnderstandingService
-
def build(media_type:, detections:, transcript_text: nil)
-
rows = Array(detections)
-
faces = rows.sum { |row| Array(row[:faces]).length }
-
ocr_chunks = rows.map { |row| row[:ocr_text].to_s.strip }.reject(&:blank?)
-
ocr_text = ocr_chunks.uniq.join("\n").strip.presence
-
-
locations = rows.flat_map { |row| Array(row[:location_tags]) }.map(&:to_s).map(&:strip).reject(&:blank?).uniq
-
objects = rows.flat_map { |row| Array(row[:content_signals]) }.map(&:to_s).map(&:strip).reject(&:blank?)
-
object_detections = rows.flat_map { |row| Array(row[:object_detections]) }.select { |row| row.is_a?(Hash) }
-
scenes = rows.flat_map { |row| Array(row[:scenes]) }.select { |row| row.is_a?(Hash) }
-
ocr_blocks = rows.flat_map { |row| Array(row[:ocr_blocks]) }.select { |row| row.is_a?(Hash) }
-
mentions = rows.flat_map { |row| Array(row[:mentions]) }.map(&:to_s).map(&:downcase).uniq
-
hashtags = rows.flat_map { |row| Array(row[:hashtags]) }.map(&:to_s).map(&:downcase).uniq
-
profile_handles = rows.flat_map { |row| Array(row[:profile_handles]) }.map(&:to_s).map(&:downcase).uniq
-
-
combined_text = [ ocr_text, transcript_text.to_s ].compact.join("\n")
-
sentiment = infer_sentiment(combined_text)
-
topics = infer_topics(objects: objects, hashtags: hashtags, transcript: transcript_text, ocr_text: ocr_text)
-
-
{
-
objects: objects.first(60),
-
faces: faces,
-
locations: locations.first(30),
-
ocr_text: ocr_text,
-
ocr_blocks: ocr_blocks.first(120),
-
transcript: transcript_text.to_s.presence,
-
sentiment: sentiment,
-
topics: topics.first(30),
-
mentions: mentions.first(40),
-
hashtags: hashtags.first(40),
-
profile_handles: profile_handles.first(40),
-
scenes: scenes.first(80),
-
object_detections: object_detections.first(120),
-
media_type: media_type.to_s
-
}
-
end
-
-
private
-
-
POSITIVE_TERMS = %w[happy great love awesome excited win winning strong proud blessed amazing].freeze
-
NEGATIVE_TERMS = %w[sad angry upset bad pain tired depressed sick fail failing stressed].freeze
-
STOPWORDS = %w[the a an and or to of in on at for with is are this that from your my our they].freeze
-
-
def infer_sentiment(text)
-
tokens = tokenize(text)
-
return "neutral" if tokens.empty?
-
-
positive = tokens.count { |token| POSITIVE_TERMS.include?(token) }
-
negative = tokens.count { |token| NEGATIVE_TERMS.include?(token) }
-
return "positive" if positive > negative
-
return "negative" if negative > positive
-
-
"neutral"
-
end
-
-
def infer_topics(objects:, hashtags:, transcript:, ocr_text:)
-
from_labels = objects.map(&:downcase)
-
from_hashtags = hashtags.map { |tag| tag.to_s.sub(/^#/, "") }.reject(&:blank?)
-
from_text = tokenize([ transcript, ocr_text ].join(" ")).reject { |token| STOPWORDS.include?(token) }
-
(from_labels + from_hashtags + from_text).reject(&:blank?).uniq
-
end
-
-
def tokenize(text)
-
text.to_s.downcase.scan(/[a-z0-9_]+/)
-
end
-
end
-
class StoryIngestionService
-
def initialize(account:, profile:, enqueue_processing: true)
-
@account = account
-
@profile = profile
-
@enqueue_processing = enqueue_processing
-
end
-
-
def ingest!(story:, source_event: nil, bytes: nil, content_type: nil, filename: nil, force_reprocess: false)
-
story_id = story[:story_id].to_s.strip
-
raise ArgumentError, "story_id is required" if story_id.blank?
-
-
record = InstagramStory.find_or_initialize_by(instagram_profile: @profile, story_id: story_id)
-
existing_story_record = record.persisted?
-
record.instagram_account = @account
-
record.source_event = source_event if source_event.present?
-
record.media_type = story[:media_type].to_s.presence || infer_media_type(content_type: content_type)
-
record.media_url = story[:media_url].to_s.presence
-
record.image_url = story[:image_url].to_s.presence
-
record.video_url = story[:video_url].to_s.presence
-
record.taken_at = story[:taken_at] if story[:taken_at].present?
-
record.expires_at = story[:expiring_at] if story[:expiring_at].present?
-
record.duration_seconds = extract_duration_seconds(story: story, current: record.duration_seconds)
-
record.metadata = merged_metadata(
-
existing: record.metadata,
-
story: story,
-
filename: filename,
-
content_type: content_type,
-
media_bytes: bytes&.bytesize,
-
existing_story_record: existing_story_record
-
)
-
-
if record.new_record? || force_reprocess
-
record.processed = false
-
record.processing_status = "pending"
-
record.processed_at = nil
-
end
-
-
record.save!
-
attach_media!(record: record, bytes: bytes, content_type: content_type, filename: filename) if bytes.present?
-
enqueue_processing!(record: record, force_reprocess: force_reprocess)
-
record
-
end
-
-
private
-
-
def infer_media_type(content_type:)
-
value = content_type.to_s.downcase
-
return "video" if value.start_with?("video/")
-
return "image" if value.start_with?("image/")
-
-
nil
-
end
-
-
def merged_metadata(existing:, story:, filename:, content_type:, media_bytes:, existing_story_record:)
-
current = existing.is_a?(Hash) ? existing : {}
-
current.merge(
-
"story_payload" => {
-
"caption" => story[:caption].to_s,
-
"permalink" => story[:permalink].to_s
-
},
-
"media_filename" => filename.to_s,
-
"media_content_type" => content_type.to_s,
-
"media_bytes" => media_bytes.to_i,
-
"duplicate_story_storage_prevented" => ActiveModel::Type::Boolean.new.cast(existing_story_record),
-
"ingested_at" => Time.current.iso8601
-
)
-
end
-
-
def extract_duration_seconds(story:, current:)
-
values = [
-
story[:duration_seconds],
-
story[:duration],
-
story[:video_duration],
-
current
-
]
-
out = values.compact.map(&:to_f).find(&:positive?)
-
out&.round(2)
-
end
-
-
def attach_media!(record:, bytes:, content_type:, filename:)
-
return if record.media.attached?
-
-
name = filename.to_s.presence || "story_#{record.story_id.parameterize}.bin"
-
record.media.attach(io: StringIO.new(bytes), filename: name, content_type: content_type.to_s.presence || "application/octet-stream")
-
rescue StandardError
-
nil
-
end
-
-
def enqueue_processing!(record:, force_reprocess:)
-
return unless @enqueue_processing
-
return if record.processing_status == "processing"
-
return if record.processed? && !force_reprocess
-
-
StoryProcessingJob.perform_later(instagram_story_id: record.id, force: force_reprocess)
-
end
-
end
-
require "json"
-
require "net/http"
-
require "uri"
-
-
class StoryProcessingService
-
DEFAULT_MATCH_MIN_CONFIDENCE = ENV.fetch("STORY_FACE_MATCH_MIN_CONFIDENCE", "0.78").to_f
-
-
def initialize(
-
story:,
-
force: false,
-
face_detection_service: FaceDetectionService.new,
-
face_embedding_service: FaceEmbeddingService.new,
-
vector_matching_service: VectorMatchingService.new,
-
user_profile_builder_service: UserProfileBuilderService.new,
-
video_frame_extraction_service: VideoFrameExtractionService.new,
-
video_audio_extraction_service: VideoAudioExtractionService.new,
-
speech_transcription_service: SpeechTranscriptionService.new,
-
video_metadata_service: VideoMetadataService.new,
-
video_frame_change_detector_service: VideoFrameChangeDetectorService.new,
-
content_understanding_service: StoryContentUnderstandingService.new,
-
response_generation_service: ResponseGenerationService.new,
-
face_identity_resolution_service: FaceIdentityResolutionService.new,
-
match_min_confidence: nil
-
)
-
@story = story
-
@force = ActiveModel::Type::Boolean.new.cast(force)
-
@face_detection_service = face_detection_service
-
@face_embedding_service = face_embedding_service
-
@vector_matching_service = vector_matching_service
-
@user_profile_builder_service = user_profile_builder_service
-
@video_frame_extraction_service = video_frame_extraction_service
-
@video_audio_extraction_service = video_audio_extraction_service
-
@speech_transcription_service = speech_transcription_service
-
@video_metadata_service = video_metadata_service
-
@video_frame_change_detector_service = video_frame_change_detector_service
-
@content_understanding_service = content_understanding_service
-
@response_generation_service = response_generation_service
-
@face_identity_resolution_service = face_identity_resolution_service
-
@match_min_confidence = begin
-
value = match_min_confidence.nil? ? DEFAULT_MATCH_MIN_CONFIDENCE : match_min_confidence.to_f
-
value.negative? ? DEFAULT_MATCH_MIN_CONFIDENCE : value
-
rescue StandardError
-
DEFAULT_MATCH_MIN_CONFIDENCE
-
end
-
end
-
-
def process!
-
return @story if @story.processed? && !@force
-
-
@story.update!(processing_status: "processing", processed: false)
-
@story.instagram_story_faces.delete_all if @force
-
-
media_payload = load_media_payload
-
result =
-
if media_payload[:media_type] == "video"
-
process_video_story(media_payload)
-
else
-
process_image_story(media_payload)
-
end
-
-
persist_faces!(detected_faces: result[:faces], story_id: media_payload[:story_id], fallback_image_bytes: media_payload[:image_bytes])
-
linked_face_count = @story.instagram_story_faces.where.not(instagram_story_person_id: nil).count
-
unlinked_face_count = @story.instagram_story_faces.where(instagram_story_person_id: nil).count
-
content_understanding = @content_understanding_service.build(
-
media_type: media_payload[:media_type],
-
detections: result[:detections],
-
transcript_text: result[:transcript_text]
-
)
-
suggestions = @response_generation_service.generate(
-
profile: @story.instagram_profile,
-
content_understanding: content_understanding
-
)
-
-
metadata = (@story.metadata.is_a?(Hash) ? @story.metadata : {}).merge(
-
"ocr_text" => content_understanding[:ocr_text].to_s,
-
"location_tags" => Array(content_understanding[:locations]).uniq,
-
"content_signals" => Array(content_understanding[:objects]).uniq,
-
"mentions" => Array(content_understanding[:mentions]).uniq,
-
"hashtags" => Array(content_understanding[:hashtags]).uniq,
-
"transcript" => content_understanding[:transcript].to_s.presence,
-
"face_count" => result[:faces].length,
-
"linked_face_count" => linked_face_count,
-
"unlinked_face_count" => unlinked_face_count,
-
"min_match_confidence" => @match_min_confidence.round(3),
-
"processing_path" => media_payload[:media_type],
-
"generated_response_suggestions" => suggestions,
-
"content_understanding" => content_understanding,
-
"last_processed_at" => Time.current.iso8601,
-
"pipeline_version" => "story_processing_v2",
-
"processing_metadata" => result[:processing_metadata]
-
)
-
-
@story.update!(
-
processed: true,
-
processing_status: "processed",
-
processed_at: Time.current,
-
duration_seconds: result[:duration_seconds] || @story.duration_seconds,
-
metadata: metadata
-
)
-
-
identity_resolution = @face_identity_resolution_service.resolve_for_story!(
-
story: @story,
-
extracted_usernames: (
-
Array(content_understanding[:mentions]) +
-
Array(content_understanding[:profile_handles]) +
-
content_understanding[:ocr_text].to_s.scan(/@[a-zA-Z0-9._]{2,30}/)
-
),
-
content_summary: content_understanding
-
)
-
if identity_resolution.is_a?(Hash) && identity_resolution[:summary].is_a?(Hash)
-
story_meta = @story.metadata.is_a?(Hash) ? @story.metadata.deep_dup : {}
-
story_meta["face_identity"] = identity_resolution[:summary]
-
story_meta["participant_summary"] = identity_resolution[:summary][:participant_summary_text].to_s
-
@story.update!(metadata: story_meta)
-
end
-
-
InstagramProfileEvent.broadcast_story_archive_refresh!(account: @story.instagram_account)
-
-
@user_profile_builder_service.refresh!(profile: @story.instagram_profile)
-
@story
-
rescue StandardError => e
-
fail_story!(error_message: e.message)
-
raise
-
end
-
-
private
-
-
def process_image_story(media_payload)
-
detection = @face_detection_service.detect(media_payload: media_payload)
-
faces = Array(detection[:faces]).map do |face|
-
face.merge(image_bytes: media_payload[:image_bytes], frame_index: 0, timestamp_seconds: 0.0)
-
end
-
-
{
-
detections: [ detection ],
-
faces: faces,
-
transcript_text: nil,
-
duration_seconds: nil,
-
processing_metadata: {
-
source: "image_single_frame",
-
detection_metadata: detection[:metadata]
-
}
-
}
-
end
-
-
def process_video_story(media_payload)
-
mode = @video_frame_change_detector_service.classify(
-
video_bytes: media_payload[:bytes],
-
reference_id: media_payload[:story_id],
-
content_type: media_payload[:content_type]
-
)
-
if mode[:processing_mode].to_s == "static_image" && mode[:frame_bytes].present?
-
result = process_image_story(
-
media_payload.merge(
-
media_type: "image",
-
image_bytes: mode[:frame_bytes]
-
)
-
)
-
result[:duration_seconds] = mode[:duration_seconds] if mode[:duration_seconds].to_f.positive?
-
result[:processing_metadata] = (result[:processing_metadata].is_a?(Hash) ? result[:processing_metadata] : {}).merge(
-
source: "video_static_single_frame",
-
frame_change_detection: mode[:metadata]
-
)
-
return result
-
end
-
-
probe =
-
if mode[:duration_seconds].to_f.positive? || mode.dig(:metadata, :video_probe).is_a?(Hash)
-
{
-
duration_seconds: mode[:duration_seconds],
-
metadata: mode.dig(:metadata, :video_probe).is_a?(Hash) ? mode.dig(:metadata, :video_probe) : {}
-
}
-
else
-
@video_metadata_service.probe(
-
video_bytes: media_payload[:bytes],
-
story_id: media_payload[:story_id],
-
content_type: media_payload[:content_type]
-
)
-
end
-
frames_result = @video_frame_extraction_service.extract(
-
video_bytes: media_payload[:bytes],
-
story_id: media_payload[:story_id],
-
content_type: media_payload[:content_type]
-
)
-
-
detections = []
-
faces = []
-
Array(frames_result[:frames]).each do |frame|
-
detection = @face_detection_service.detect(
-
media_payload: {
-
story_id: media_payload[:story_id],
-
media_type: "image",
-
image_bytes: frame[:image_bytes]
-
}
-
)
-
detections << detection.merge(frame_index: frame[:index], timestamp_seconds: frame[:timestamp_seconds])
-
-
Array(detection[:faces]).each do |face|
-
faces << face.merge(
-
image_bytes: frame[:image_bytes],
-
frame_index: frame[:index],
-
timestamp_seconds: frame[:timestamp_seconds]
-
)
-
end
-
end
-
-
audio = @video_audio_extraction_service.extract(
-
video_bytes: media_payload[:bytes],
-
story_id: media_payload[:story_id],
-
content_type: media_payload[:content_type]
-
)
-
transcript = @speech_transcription_service.transcribe(
-
audio_bytes: audio[:audio_bytes],
-
story_id: media_payload[:story_id]
-
)
-
-
{
-
detections: detections,
-
faces: faces,
-
transcript_text: transcript[:transcript],
-
duration_seconds: probe[:duration_seconds],
-
processing_metadata: {
-
source: "video_multistage",
-
video_probe: probe[:metadata],
-
frame_change_detection: mode[:metadata],
-
frame_extraction: frames_result[:metadata],
-
audio_extraction: audio[:metadata],
-
transcription: transcript[:metadata]
-
}
-
}
-
end
-
-
def persist_faces!(detected_faces:, story_id:, fallback_image_bytes:)
-
Array(detected_faces).each do |face|
-
observation_signature = face_observation_signature(story_id: story_id, face: face)
-
confidence = face[:confidence].to_f
-
unless linkable_face_confidence?(confidence)
-
persist_unlinked_story_face!(
-
face: face,
-
observation_signature: observation_signature,
-
reason: "low_confidence"
-
)
-
next
-
end
-
-
face_image_bytes = face[:image_bytes].presence || fallback_image_bytes
-
if face_image_bytes.blank?
-
persist_unlinked_story_face!(
-
face: face,
-
observation_signature: observation_signature,
-
reason: "face_image_missing"
-
)
-
next
-
end
-
-
embedding_payload = @face_embedding_service.embed(
-
media_payload: {
-
story_id: story_id,
-
media_type: "image",
-
image_bytes: face_image_bytes
-
},
-
face: face
-
)
-
vector = Array(embedding_payload[:vector]).map(&:to_f)
-
if vector.empty?
-
persist_unlinked_story_face!(
-
face: face,
-
observation_signature: observation_signature,
-
reason: "embedding_unavailable"
-
)
-
next
-
end
-
-
match = @vector_matching_service.match_or_create!(
-
account: @story.instagram_account,
-
profile: @story.instagram_profile,
-
embedding: vector,
-
occurred_at: @story.taken_at || Time.current,
-
observation_signature: observation_signature
-
)
-
update_person_face_attributes!(person: match[:person], face: face)
-
-
attrs = {
-
instagram_story_person: match[:person],
-
role: match[:role].to_s.presence || "unknown",
-
detector_confidence: face[:confidence].to_f,
-
match_similarity: match[:similarity],
-
embedding_version: embedding_payload[:version].to_s,
-
embedding: vector,
-
bounding_box: face[:bounding_box],
-
metadata: story_face_metadata(
-
face: face,
-
observation_signature: observation_signature,
-
link_status: "matched"
-
)
-
}
-
attrs[:embedding_vector] = vector if InstagramStoryFace.column_names.include?("embedding_vector")
-
@story.instagram_story_faces.create!(attrs)
-
end
-
end
-
-
def face_observation_signature(story_id:, face:)
-
bbox = face[:bounding_box].is_a?(Hash) ? face[:bounding_box] : {}
-
[
-
"story",
-
story_id.to_s,
-
face[:frame_index].to_i,
-
face[:timestamp_seconds].to_f.round(3),
-
bbox["x1"],
-
bbox["y1"],
-
bbox["x2"],
-
bbox["y2"]
-
].map(&:to_s).join(":")
-
end
-
-
def linkable_face_confidence?(confidence)
-
confidence.to_f >= @match_min_confidence
-
end
-
-
def persist_unlinked_story_face!(face:, observation_signature:, reason:)
-
@story.instagram_story_faces.create!(
-
instagram_story_person: nil,
-
role: "unknown",
-
detector_confidence: face[:confidence].to_f,
-
match_similarity: nil,
-
embedding_version: nil,
-
embedding: nil,
-
bounding_box: face[:bounding_box],
-
metadata: story_face_metadata(
-
face: face,
-
observation_signature: observation_signature,
-
link_status: "unlinked",
-
link_skip_reason: reason
-
)
-
)
-
rescue StandardError
-
nil
-
end
-
-
def story_face_metadata(face:, observation_signature:, link_status:, link_skip_reason: nil)
-
{
-
frame_index: face[:frame_index],
-
timestamp_seconds: face[:timestamp_seconds],
-
landmarks: face[:landmarks],
-
likelihoods: face[:likelihoods],
-
age: face[:age],
-
age_range: face[:age_range],
-
gender: face[:gender],
-
gender_score: face[:gender_score].to_f,
-
observation_signature: observation_signature,
-
link_status: link_status,
-
link_skip_reason: link_skip_reason
-
}.compact
-
end
-
-
def load_media_payload
-
bytes = nil
-
content_type = nil
-
-
if @story.media.attached?
-
bytes = @story.media.download
-
content_type = @story.media.content_type.to_s
-
end
-
-
url = media_download_url
-
if bytes.blank? && url.present?
-
bytes = download_bytes!(url)
-
content_type = infer_content_type_from_url(url, fallback: content_type)
-
end
-
-
raise "No media payload available for story_id=#{@story.story_id}" if bytes.blank?
-
-
media_type = infer_media_type(
-
story_media_type: @story.media_type,
-
content_type: content_type
-
)
-
image_bytes = media_type == "image" ? bytes : nil
-
-
{
-
story_id: @story.story_id,
-
media_type: media_type,
-
bytes: bytes,
-
content_type: content_type,
-
image_bytes: image_bytes
-
}
-
end
-
-
def media_download_url
-
if @story.video?
-
@story.video_url.to_s.presence || @story.media_url.to_s.presence || @story.image_url.to_s.presence
-
else
-
@story.image_url.to_s.presence || @story.media_url.to_s.presence
-
end
-
end
-
-
def infer_media_type(story_media_type:, content_type:)
-
return "video" if story_media_type.to_s == "video"
-
return "video" if content_type.to_s.start_with?("video/")
-
-
"image"
-
end
-
-
def infer_content_type_from_url(url, fallback:)
-
return fallback.to_s if fallback.to_s.present?
-
-
value = url.to_s.downcase
-
return "video/mp4" if value.include?(".mp4")
-
return "video/quicktime" if value.include?(".mov")
-
return "image/png" if value.include?(".png")
-
return "image/webp" if value.include?(".webp")
-
return "image/jpeg" if value.include?(".jpg") || value.include?(".jpeg")
-
-
"application/octet-stream"
-
end
-
-
def download_bytes!(url)
-
uri = URI.parse(url)
-
raise "Invalid media URL" unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
-
-
req = Net::HTTP::Get.new(uri.request_uri)
-
req["Accept"] = "*/*"
-
req["Referer"] = "https://www.instagram.com/"
-
-
http = Net::HTTP.new(uri.host, uri.port)
-
http.use_ssl = (uri.scheme == "https")
-
http.open_timeout = 8
-
http.read_timeout = 25
-
-
response = http.request(req)
-
raise "Media download failed: HTTP #{response.code}" unless response.is_a?(Net::HTTPSuccess)
-
-
response.body.to_s
-
end
-
-
def update_person_face_attributes!(person:, face:)
-
return unless person
-
-
metadata = person.metadata.is_a?(Hash) ? person.metadata.deep_dup : {}
-
attrs = metadata["face_attributes"].is_a?(Hash) ? metadata["face_attributes"].deep_dup : {}
-
-
gender = face[:gender].to_s.strip.downcase
-
if gender.present?
-
gender_counts = attrs["gender_counts"].is_a?(Hash) ? attrs["gender_counts"].deep_dup : {}
-
gender_counts[gender] = gender_counts[gender].to_i + 1
-
attrs["gender_counts"] = gender_counts
-
attrs["primary_gender_cue"] = gender_counts.max_by { |_key, count| count.to_i }&.first
-
end
-
-
age_range = face[:age_range].to_s.strip
-
if age_range.present?
-
age_counts = attrs["age_range_counts"].is_a?(Hash) ? attrs["age_range_counts"].deep_dup : {}
-
age_counts[age_range] = age_counts[age_range].to_i + 1
-
attrs["age_range_counts"] = age_counts
-
attrs["primary_age_range"] = age_counts.max_by { |_key, count| count.to_i }&.first
-
end
-
-
age_value = face[:age].to_f
-
if age_value.positive?
-
samples = Array(attrs["age_samples"]).map(&:to_f).first(19)
-
samples << age_value.round(1)
-
attrs["age_samples"] = samples
-
attrs["age_estimate"] = (samples.sum / samples.length.to_f).round(1)
-
end
-
-
attrs["last_observed_at"] = Time.current.iso8601
-
metadata["face_attributes"] = attrs
-
person.update_columns(metadata: metadata, updated_at: Time.current)
-
rescue StandardError
-
nil
-
end
-
-
def fail_story!(error_message:)
-
metadata = (@story.metadata.is_a?(Hash) ? @story.metadata : {}).merge(
-
"processing_error" => error_message.to_s,
-
"failed_at" => Time.current.iso8601
-
)
-
@story.update(
-
processing_status: "failed",
-
processed: false,
-
metadata: metadata
-
)
-
InstagramProfileEvent.broadcast_story_archive_refresh!(account: @story.instagram_account)
-
rescue StandardError
-
nil
-
end
-
end
-
class UserProfileBuilderService
-
def refresh!(profile:)
-
stories = profile.instagram_stories.processed.to_a
-
return nil if stories.empty?
-
-
by_hour = Hash.new(0)
-
by_weekday = Hash.new(0)
-
location_counts = Hash.new(0)
-
content_signal_counts = Hash.new(0)
-
topic_counts = Hash.new(0)
-
hashtag_counts = Hash.new(0)
-
sentiment_counts = Hash.new(0)
-
-
stories.each do |story|
-
timestamp = story.taken_at || story.created_at
-
by_hour[timestamp.hour] += 1
-
by_weekday[timestamp.wday] += 1
-
-
metadata = story.metadata.is_a?(Hash) ? story.metadata : {}
-
Array(metadata["location_tags"]).each { |tag| location_counts[tag.to_s] += 1 if tag.present? }
-
Array(metadata["content_signals"]).each { |signal| content_signal_counts[signal.to_s] += 1 if signal.present? }
-
understanding = metadata["content_understanding"].is_a?(Hash) ? metadata["content_understanding"] : {}
-
Array(understanding["topics"]).each { |topic| topic_counts[topic.to_s] += 1 if topic.present? }
-
Array(understanding["hashtags"]).each { |tag| hashtag_counts[tag.to_s] += 1 if tag.present? }
-
sentiment = understanding["sentiment"].to_s.strip
-
sentiment_counts[sentiment] += 1 if sentiment.present?
-
end
-
-
story_person_counts = InstagramStoryFace.joins(:instagram_story)
-
.where(instagram_stories: { instagram_profile_id: profile.id })
-
.where.not(instagram_story_person_id: nil)
-
.group(:instagram_story_person_id)
-
.count
-
-
post_person_counts = InstagramPostFace.joins(:instagram_profile_post)
-
.where(instagram_profile_posts: { instagram_profile_id: profile.id })
-
.where.not(instagram_story_person_id: nil)
-
.group(:instagram_story_person_id)
-
.count
-
-
person_counts = story_person_counts.merge(post_person_counts) { |_person_id, left, right| left.to_i + right.to_i }
-
-
top_people = profile.instagram_story_people.where(id: person_counts.keys).map do |person|
-
{
-
person_id: person.id,
-
role: person.role,
-
label: person.label.to_s.presence,
-
appearances: person_counts[person.id].to_i
-
}.compact
-
end.sort_by { |row| -row[:appearances] }.first(10)
-
-
score = activity_score(
-
stories_count: stories.length,
-
active_hours_count: by_hour.keys.length,
-
secondary_person_mentions: top_people.reject { |row| row[:role] == "primary_user" }.sum { |row| row[:appearances].to_i }
-
)
-
-
record = InstagramProfileBehaviorProfile.find_or_initialize_by(instagram_profile: profile)
-
existing_summary = record.behavioral_summary.is_a?(Hash) ? record.behavioral_summary.deep_dup : {}
-
existing_metadata = record.metadata.is_a?(Hash) ? record.metadata.deep_dup : {}
-
-
summary = {
-
posting_time_pattern: {
-
hour_histogram: by_hour.sort.to_h,
-
weekday_histogram: by_weekday.sort.to_h
-
},
-
common_locations: sort_top(location_counts),
-
frequent_secondary_persons: top_people.reject { |row| row[:role] == "primary_user" },
-
content_categories: sort_top(content_signal_counts),
-
topic_clusters: sort_top(topic_counts),
-
top_hashtags: sort_top(hashtag_counts),
-
sentiment_trend: sort_top(sentiment_counts, limit: 5)
-
}
-
summary["face_identity_profile"] = existing_summary["face_identity_profile"] if existing_summary["face_identity_profile"].is_a?(Hash)
-
summary["related_individuals"] = Array(existing_summary["related_individuals"]) if existing_summary["related_individuals"].present?
-
summary["known_username_matches"] = Array(existing_summary["known_username_matches"]) if existing_summary["known_username_matches"].present?
-
-
record.activity_score = score
-
record.behavioral_summary = summary
-
record.metadata = existing_metadata.merge(
-
stories_processed: stories.length,
-
post_faces_processed: profile.instagram_post_faces.count,
-
refreshed_at: Time.current.iso8601
-
)
-
record.save!
-
record
-
end
-
-
private
-
-
def sort_top(count_hash, limit: 20)
-
count_hash.sort_by { |_key, count| -count }.first(limit).to_h
-
end
-
-
def activity_score(stories_count:, active_hours_count:, secondary_person_mentions:)
-
volume = [ stories_count.to_f / 30.0, 1.0 ].min
-
hourly_diversity = [ active_hours_count.to_f / 24.0, 1.0 ].min
-
social = [ secondary_person_mentions.to_f / 20.0, 1.0 ].min
-
((volume + hourly_diversity + social) / 3.0).round(4)
-
end
-
end
-
class VectorMatchingService
-
DEFAULT_THRESHOLD = 0.85
-
-
def initialize(threshold: nil)
-
@threshold = threshold.to_f.positive? ? threshold.to_f : DEFAULT_THRESHOLD
-
end
-
-
def match_or_create!(account:, profile:, embedding:, occurred_at: Time.current, observation_signature: nil)
-
vector = normalize(embedding)
-
raise ArgumentError, "embedding vector is required" if vector.empty?
-
-
best = best_match(profile: profile, vector: vector)
-
if best && best[:similarity] >= @threshold
-
person = best[:person]
-
observation = upsert_person_embedding!(
-
person: person,
-
vector: vector,
-
occurred_at: occurred_at,
-
observation_signature: observation_signature
-
)
-
role = person.role == "primary_user" ? "primary_user" : "secondary_person"
-
return {
-
person: person,
-
matched: true,
-
similarity: best[:similarity],
-
role: role,
-
observation_recorded: observation[:recorded]
-
}
-
end
-
-
normalized_signature = normalize_observation_signature(observation_signature)
-
metadata = { source: "auto_cluster" }
-
if normalized_signature.present?
-
metadata["observation_signatures"] = [ normalized_signature ]
-
metadata["observation_signatures_count"] = 1
-
metadata["last_observation_signature"] = normalized_signature
-
end
-
-
attrs = {
-
instagram_account: account,
-
instagram_profile: profile,
-
role: "secondary_person",
-
first_seen_at: occurred_at,
-
last_seen_at: occurred_at,
-
appearance_count: 1,
-
canonical_embedding: vector,
-
metadata: metadata
-
}
-
attrs[:canonical_embedding_vector] = vector if pgvector_column_available?
-
person = InstagramStoryPerson.create!(attrs)
-
person.sync_identity_confidence!
-
-
{
-
person: person,
-
matched: false,
-
similarity: best&.dig(:similarity),
-
role: person.role,
-
observation_recorded: true
-
}
-
end
-
-
def upsert_primary_person!(account:, profile:, embedding:, occurred_at: Time.current, label: nil)
-
vector = normalize(embedding)
-
raise ArgumentError, "embedding vector is required" if vector.empty?
-
-
person = InstagramStoryPerson.find_or_initialize_by(
-
instagram_account: account,
-
instagram_profile: profile,
-
role: "primary_user"
-
)
-
person.label = label if label.present?
-
person.first_seen_at ||= occurred_at
-
person.last_seen_at = [ person.last_seen_at, occurred_at ].compact.max
-
person.appearance_count = [ person.appearance_count.to_i, 1 ].max
-
person.canonical_embedding = vector
-
person.canonical_embedding_vector = vector if person.respond_to?(:canonical_embedding_vector=)
-
person.metadata = (person.metadata.is_a?(Hash) ? person.metadata : {}).merge("source" => "primary_seed")
-
person.save!
-
person.sync_identity_confidence!
-
person
-
end
-
-
private
-
-
def best_match(profile:, vector:)
-
if pgvector_enabled?
-
vector_sql = vector_literal(vector)
-
query = profile.instagram_story_people.where.not(canonical_embedding_vector: nil)
-
return nil unless query.exists?
-
-
person = query
-
.select(Arel.sql("instagram_story_people.*, (1 - (canonical_embedding_vector <=> '#{vector_sql}'::vector)) AS similarity_score"))
-
.order(Arel.sql("canonical_embedding_vector <=> '#{vector_sql}'::vector"))
-
.limit(25)
-
.to_a
-
.find(&:active_for_matching?)
-
return nil unless person
-
-
return { person: person, similarity: person.attributes["similarity_score"].to_f }
-
end
-
-
candidates = profile.instagram_story_people.where.not(canonical_embedding: nil).to_a.select(&:active_for_matching?)
-
return nil if candidates.empty?
-
-
candidates.map do |person|
-
other = normalize(person.canonical_embedding)
-
next nil if other.length != vector.length
-
-
{ person: person, similarity: cosine_similarity(vector, other) }
-
end.compact.max_by { |item| item[:similarity] }
-
end
-
-
def upsert_person_embedding!(person:, vector:, occurred_at:, observation_signature:)
-
current_count = person.appearance_count.to_i
-
current = normalize(person.canonical_embedding)
-
metadata = person.metadata.is_a?(Hash) ? person.metadata.deep_dup : {}
-
-
normalized_signature = normalize_observation_signature(observation_signature)
-
known_signatures = Array(metadata["observation_signatures"]).map(&:to_s).reject(&:blank?)
-
duplicate_observation = normalized_signature.present? && known_signatures.include?(normalized_signature)
-
-
if duplicate_observation
-
attrs = {
-
first_seen_at: person.first_seen_at || occurred_at,
-
last_seen_at: [ person.last_seen_at, occurred_at ].compact.max
-
}
-
person.update!(attrs)
-
return { recorded: false, duplicate: true }
-
end
-
-
updated_vector = if current.length == vector.length && current_count.positive?
-
merged = current.each_with_index.map do |value, idx|
-
((value * current_count) + vector[idx]) / (current_count + 1)
-
end
-
normalize(merged)
-
else
-
vector
-
end
-
-
attrs = {
-
canonical_embedding: updated_vector,
-
appearance_count: current_count + 1,
-
first_seen_at: person.first_seen_at || occurred_at,
-
last_seen_at: [ person.last_seen_at, occurred_at ].compact.max
-
}
-
-
if normalized_signature.present?
-
updated_signatures = (known_signatures << normalized_signature).uniq.last(400)
-
metadata["observation_signatures"] = updated_signatures
-
metadata["observation_signatures_count"] = updated_signatures.length
-
metadata["last_observation_signature"] = normalized_signature
-
attrs[:metadata] = metadata
-
end
-
-
attrs[:canonical_embedding_vector] = updated_vector if person.respond_to?(:canonical_embedding_vector=)
-
person.update!(attrs)
-
person.sync_identity_confidence!
-
{ recorded: true, duplicate: false }
-
end
-
-
def cosine_similarity(a, b)
-
dot = 0.0
-
mag_a = 0.0
-
mag_b = 0.0
-
-
a.each_with_index do |left, idx|
-
right = b[idx].to_f
-
dot += left * right
-
mag_a += left * left
-
mag_b += right * right
-
end
-
-
denom = Math.sqrt(mag_a) * Math.sqrt(mag_b)
-
return 0.0 if denom <= 0.0
-
-
dot / denom
-
end
-
-
def normalize(values)
-
vector = Array(values).map(&:to_f)
-
return [] if vector.empty?
-
-
norm = Math.sqrt(vector.sum { |value| value * value })
-
return [] if norm <= 0.0
-
-
vector.map { |value| value / norm }
-
end
-
-
def pgvector_enabled?
-
return false unless ActiveRecord::Base.connection.adapter_name.to_s.downcase.include?("postgresql")
-
pgvector_column_available?
-
rescue StandardError
-
false
-
end
-
-
def pgvector_column_available?
-
InstagramStoryPerson.column_names.include?("canonical_embedding_vector")
-
end
-
-
def vector_literal(vector)
-
"[" + vector.map { |value| format("%.8f", value.to_f) }.join(",") + "]"
-
end
-
-
def normalize_observation_signature(value)
-
token = value.to_s.strip
-
return nil if token.blank?
-
-
token.byteslice(0, 255)
-
end
-
end
-
require "open3"
-
require "shellwords"
-
require "tempfile"
-
require "tmpdir"
-
-
class VideoAudioExtractionService
-
def initialize(ffmpeg_bin: nil)
-
resolved_bin = ffmpeg_bin.to_s.presence || ENV["FFMPEG_BIN"].to_s.presence || default_ffmpeg_bin
-
@ffmpeg_bin = resolved_bin.to_s
-
end
-
-
def extract(video_bytes:, story_id:, content_type: nil)
-
return empty_result("video_bytes_missing") if video_bytes.blank?
-
return empty_result("ffmpeg_missing") unless command_available?(@ffmpeg_bin)
-
-
Tempfile.create([ "story_video_#{story_id}", extension_for(content_type: content_type) ]) do |video_file|
-
video_file.binmode
-
video_file.write(video_bytes)
-
video_file.flush
-
-
Dir.mktmpdir("story_audio_#{story_id}_") do |output_dir|
-
output_path = File.join(output_dir, "audio.wav")
-
cmd = [ @ffmpeg_bin, "-hide_banner", "-loglevel", "error", "-i", video_file.path, "-vn", "-ac", "1", "-ar", "16000", "-f", "wav", output_path ]
-
_stdout, stderr, status = Open3.capture3(*cmd)
-
return empty_result("ffmpeg_audio_extract_failed", stderr: stderr.to_s) unless status.success?
-
return empty_result("audio_not_found") unless File.exist?(output_path)
-
-
{
-
audio_bytes: File.binread(output_path),
-
content_type: "audio/wav",
-
metadata: {
-
source: "ffmpeg"
-
}
-
}
-
end
-
end
-
rescue StandardError => e
-
empty_result("audio_extraction_error", stderr: e.message)
-
end
-
-
private
-
-
def command_available?(command)
-
system("command -v #{Shellwords.escape(command)} >/dev/null 2>&1")
-
end
-
-
def default_ffmpeg_bin
-
local_bin = File.expand_path("~/.local/bin/ffmpeg")
-
return local_bin if File.exist?(local_bin)
-
-
"ffmpeg"
-
end
-
-
def extension_for(content_type:)
-
value = content_type.to_s.downcase
-
return ".mp4" if value.include?("mp4")
-
return ".mov" if value.include?("quicktime")
-
return ".webm" if value.include?("webm")
-
-
".mp4"
-
end
-
-
def empty_result(reason, stderr: nil)
-
{
-
audio_bytes: nil,
-
content_type: nil,
-
metadata: {
-
source: "ffmpeg",
-
reason: reason,
-
stderr: stderr.to_s.presence
-
}.compact
-
}
-
end
-
end
-
require "open3"
-
require "shellwords"
-
require "tempfile"
-
-
class VideoFrameChangeDetectorService
-
DEFAULT_SAMPLE_FRAMES = 3
-
DEFAULT_DIFF_THRESHOLD = 2.5
-
GRAYSCALE_WIDTH = 32
-
GRAYSCALE_HEIGHT = 32
-
-
def initialize(
-
ffmpeg_bin: nil,
-
sample_frames: nil,
-
diff_threshold: nil,
-
video_metadata_service: VideoMetadataService.new
-
)
-
resolved_bin = ffmpeg_bin.to_s.presence || ENV["FFMPEG_BIN"].to_s.presence || default_ffmpeg_bin
-
@ffmpeg_bin = resolved_bin.to_s
-
@sample_frames = sample_frames.to_i.positive? ? sample_frames.to_i : DEFAULT_SAMPLE_FRAMES
-
@diff_threshold = diff_threshold.to_f.positive? ? diff_threshold.to_f : DEFAULT_DIFF_THRESHOLD
-
@video_metadata_service = video_metadata_service
-
end
-
-
def classify(video_bytes:, reference_id:, content_type: nil)
-
return empty_result(reason: "video_bytes_missing") if video_bytes.blank?
-
return empty_result(reason: "ffmpeg_missing") unless command_available?(@ffmpeg_bin)
-
-
Tempfile.create([ "video_change_detect_#{reference_id}", extension_for(content_type: content_type) ]) do |video_file|
-
video_file.binmode
-
video_file.write(video_bytes)
-
video_file.flush
-
-
probe = @video_metadata_service.probe(
-
video_bytes: video_bytes,
-
story_id: reference_id,
-
content_type: content_type
-
)
-
duration_seconds = probe[:duration_seconds]
-
timestamps = sample_timestamps(duration_seconds: duration_seconds)
-
-
samples = timestamps.filter_map do |timestamp|
-
gray = extract_grayscale_frame(video_path: video_file.path, timestamp_seconds: timestamp)
-
next if gray.blank?
-
-
{
-
timestamp_seconds: timestamp,
-
gray_bytes: gray
-
}
-
end
-
-
primary_timestamp = samples.first&.dig(:timestamp_seconds) || 0.0
-
representative_frame = extract_jpeg_frame(video_path: video_file.path, timestamp_seconds: primary_timestamp)
-
return empty_result(
-
reason: "insufficient_sample_frames",
-
frame_bytes: representative_frame,
-
duration_seconds: duration_seconds,
-
metadata: {
-
sampled_timestamps: timestamps,
-
sampled_frames: samples.length,
-
video_probe: probe[:metadata]
-
}
-
) if samples.length < 2
-
-
diffs = compute_frame_diffs(samples: samples)
-
max_diff = diffs.max.to_f
-
avg_diff = (diffs.sum.to_f / diffs.length.to_f).round(4)
-
static = max_diff <= @diff_threshold
-
-
{
-
static: static,
-
processing_mode: static ? "static_image" : "dynamic_video",
-
frame_bytes: representative_frame,
-
frame_content_type: representative_frame.present? ? "image/jpeg" : nil,
-
duration_seconds: duration_seconds,
-
metadata: {
-
sampled_timestamps: samples.map { |row| row[:timestamp_seconds] },
-
sampled_frames: samples.length,
-
diff_threshold: @diff_threshold,
-
max_mean_diff: max_diff.round(4),
-
avg_mean_diff: avg_diff,
-
frame_diffs: diffs.map { |value| value.round(4) },
-
video_probe: probe[:metadata]
-
}
-
}
-
end
-
rescue StandardError => e
-
empty_result(
-
reason: "frame_change_detection_error",
-
metadata: {
-
error_class: e.class.name,
-
error_message: e.message.to_s
-
}
-
)
-
end
-
-
private
-
-
def sample_timestamps(duration_seconds:)
-
duration = duration_seconds.to_f
-
return [ 0.0, 0.8, 1.6 ].first(@sample_frames).uniq if duration <= 0.0
-
-
last = [ duration - 0.12, 0.0 ].max
-
middle = duration / 2.0
-
points = [ 0.0, middle, last ].first(@sample_frames).map { |value| value.round(3) }.uniq
-
while points.length < [ @sample_frames, 2 ].max
-
points << (points.last.to_f + 0.5).round(3)
-
points = points.uniq
-
end
-
points
-
end
-
-
def extract_grayscale_frame(video_path:, timestamp_seconds:)
-
cmd = [
-
@ffmpeg_bin,
-
"-hide_banner",
-
"-loglevel",
-
"error",
-
"-ss",
-
format("%.3f", timestamp_seconds.to_f),
-
"-i",
-
video_path.to_s,
-
"-frames:v",
-
"1",
-
"-vf",
-
"scale=#{GRAYSCALE_WIDTH}:#{GRAYSCALE_HEIGHT},format=gray",
-
"-f",
-
"rawvideo",
-
"-pix_fmt",
-
"gray",
-
"pipe:1"
-
]
-
stdout, _stderr, status = Open3.capture3(*cmd)
-
return nil unless status.success?
-
return nil unless stdout.bytesize == (GRAYSCALE_WIDTH * GRAYSCALE_HEIGHT)
-
-
stdout
-
rescue StandardError
-
nil
-
end
-
-
def extract_jpeg_frame(video_path:, timestamp_seconds:)
-
cmd = [
-
@ffmpeg_bin,
-
"-hide_banner",
-
"-loglevel",
-
"error",
-
"-ss",
-
format("%.3f", timestamp_seconds.to_f),
-
"-i",
-
video_path.to_s,
-
"-frames:v",
-
"1",
-
"-q:v",
-
"2",
-
"-f",
-
"image2pipe",
-
"-vcodec",
-
"mjpeg",
-
"pipe:1"
-
]
-
stdout, _stderr, status = Open3.capture3(*cmd)
-
return nil unless status.success?
-
return nil if stdout.blank?
-
-
stdout
-
rescue StandardError
-
nil
-
end
-
-
def compute_frame_diffs(samples:)
-
list = Array(samples)
-
return [] if list.length < 2
-
-
baseline = list.first[:gray_bytes]
-
diffs = []
-
list.drop(1).each do |row|
-
diffs << mean_abs_diff(baseline, row[:gray_bytes])
-
end
-
list.each_cons(2) do |previous, current|
-
diffs << mean_abs_diff(previous[:gray_bytes], current[:gray_bytes])
-
end
-
diffs
-
end
-
-
def mean_abs_diff(bytes_a, bytes_b)
-
a = bytes_a.to_s
-
b = bytes_b.to_s
-
length = [ a.bytesize, b.bytesize ].min
-
return 255.0 if length <= 0
-
-
total = 0
-
length.times do |index|
-
total += (a.getbyte(index).to_i - b.getbyte(index).to_i).abs
-
end
-
total.to_f / length.to_f
-
end
-
-
def command_available?(command)
-
system("command -v #{Shellwords.escape(command)} >/dev/null 2>&1")
-
end
-
-
def default_ffmpeg_bin
-
local_bin = File.expand_path("~/.local/bin/ffmpeg")
-
return local_bin if File.exist?(local_bin)
-
-
"ffmpeg"
-
end
-
-
def extension_for(content_type:)
-
value = content_type.to_s.downcase
-
return ".mp4" if value.include?("mp4")
-
return ".mov" if value.include?("quicktime")
-
return ".webm" if value.include?("webm")
-
-
".mp4"
-
end
-
-
def empty_result(reason:, frame_bytes: nil, duration_seconds: nil, metadata: {})
-
{
-
static: nil,
-
processing_mode: "dynamic_video",
-
frame_bytes: frame_bytes,
-
frame_content_type: frame_bytes.present? ? "image/jpeg" : nil,
-
duration_seconds: duration_seconds,
-
metadata: {
-
reason: reason
-
}.merge(metadata.to_h)
-
}
-
end
-
end
-
require "open3"
-
require "shellwords"
-
require "tempfile"
-
require "tmpdir"
-
-
class VideoFrameExtractionService
-
DEFAULT_INTERVAL_SECONDS = 2.0
-
DEFAULT_MAX_FRAMES = 24
-
-
def initialize(ffmpeg_bin: nil, interval_seconds: nil, max_frames: nil)
-
resolved_bin = ffmpeg_bin.to_s.presence || ENV["FFMPEG_BIN"].to_s.presence || default_ffmpeg_bin
-
@ffmpeg_bin = resolved_bin.to_s
-
@interval_seconds = interval_seconds.to_f.positive? ? interval_seconds.to_f : ENV.fetch("VIDEO_FRAME_INTERVAL_SECONDS", DEFAULT_INTERVAL_SECONDS).to_f
-
@max_frames = max_frames.to_i.positive? ? max_frames.to_i : ENV.fetch("VIDEO_MAX_FRAMES", DEFAULT_MAX_FRAMES).to_i
-
end
-
-
def extract(video_bytes:, story_id:, content_type: nil)
-
return empty_result("video_bytes_missing") if video_bytes.blank?
-
return empty_result("ffmpeg_missing") unless command_available?(@ffmpeg_bin)
-
-
Tempfile.create([ "story_video_#{story_id}", extension_for(content_type: content_type) ]) do |video_file|
-
video_file.binmode
-
video_file.write(video_bytes)
-
video_file.flush
-
-
Dir.mktmpdir("story_frames_#{story_id}_") do |output_dir|
-
pattern = File.join(output_dir, "frame_%05d.jpg")
-
fps = format("1/%.2f", [ @interval_seconds, 0.2 ].max)
-
cmd = [ @ffmpeg_bin, "-hide_banner", "-loglevel", "error", "-i", video_file.path, "-vf", "fps=#{fps}", "-q:v", "2", pattern ]
-
_stdout, stderr, status = Open3.capture3(*cmd)
-
return empty_result("ffmpeg_extract_failed", stderr: stderr.to_s) unless status.success?
-
-
files = Dir[File.join(output_dir, "frame_*.jpg")].sort.first(@max_frames)
-
frames = files.each_with_index.map do |path, idx|
-
{
-
index: idx,
-
timestamp_seconds: (idx * @interval_seconds).round(2),
-
image_bytes: File.binread(path)
-
}
-
end
-
-
{
-
frames: frames,
-
metadata: {
-
source: "ffmpeg",
-
interval_seconds: @interval_seconds,
-
extracted_frames: files.length
-
}
-
}
-
end
-
end
-
rescue StandardError => e
-
empty_result("frame_extraction_error", stderr: e.message)
-
end
-
-
private
-
-
def command_available?(command)
-
system("command -v #{Shellwords.escape(command)} >/dev/null 2>&1")
-
end
-
-
def default_ffmpeg_bin
-
local_bin = File.expand_path("~/.local/bin/ffmpeg")
-
return local_bin if File.exist?(local_bin)
-
-
"ffmpeg"
-
end
-
-
def extension_for(content_type:)
-
value = content_type.to_s.downcase
-
return ".mp4" if value.include?("mp4")
-
return ".mov" if value.include?("quicktime")
-
return ".webm" if value.include?("webm")
-
-
".mp4"
-
end
-
-
def empty_result(reason, stderr: nil)
-
{
-
frames: [],
-
metadata: {
-
source: "ffmpeg",
-
reason: reason,
-
stderr: stderr.to_s.presence
-
}.compact
-
}
-
end
-
end
-
require "open3"
-
require "shellwords"
-
require "tempfile"
-
require "json"
-
-
class VideoMetadataService
-
def initialize(ffprobe_bin: ENV.fetch("FFPROBE_BIN", "ffprobe"))
-
@ffprobe_bin = ffprobe_bin.to_s
-
end
-
-
def probe(video_bytes:, story_id:, content_type: nil)
-
return { duration_seconds: nil, metadata: { reason: "video_bytes_missing" } } if video_bytes.blank?
-
return { duration_seconds: nil, metadata: { reason: "ffprobe_missing" } } unless command_available?(@ffprobe_bin)
-
-
Tempfile.create([ "story_probe_#{story_id}", extension_for(content_type: content_type) ]) do |video_file|
-
video_file.binmode
-
video_file.write(video_bytes)
-
video_file.flush
-
-
cmd = [
-
@ffprobe_bin,
-
"-v",
-
"error",
-
"-show_entries",
-
"format=duration:stream=index,codec_type,codec_name,width,height,avg_frame_rate,channels,sample_rate",
-
"-of",
-
"json",
-
video_file.path
-
]
-
stdout, stderr, status = Open3.capture3(*cmd)
-
return { duration_seconds: nil, metadata: { reason: "ffprobe_failed", stderr: stderr.to_s.presence }.compact } unless status.success?
-
-
parsed = JSON.parse(stdout.to_s.presence || "{}")
-
streams = Array(parsed["streams"]).select { |row| row.is_a?(Hash) }
-
format = parsed["format"].is_a?(Hash) ? parsed["format"] : {}
-
audio_stream = streams.find { |row| row["codec_type"].to_s == "audio" }
-
video_stream = streams.find { |row| row["codec_type"].to_s == "video" }
-
duration = format["duration"].to_f
-
{
-
duration_seconds: duration.positive? ? duration.round(2) : nil,
-
metadata: {
-
source: "ffprobe",
-
has_audio: audio_stream.present?,
-
audio_codec: audio_stream&.dig("codec_name"),
-
channels: audio_stream&.dig("channels"),
-
sample_rate_hz: integer_or_nil(audio_stream&.dig("sample_rate")),
-
video_codec: video_stream&.dig("codec_name"),
-
width: integer_or_nil(video_stream&.dig("width")),
-
height: integer_or_nil(video_stream&.dig("height")),
-
fps: frame_rate_to_float(video_stream&.dig("avg_frame_rate")),
-
stream_count: streams.length
-
}.compact
-
}
-
end
-
rescue StandardError => e
-
{ duration_seconds: nil, metadata: { reason: "video_probe_error", stderr: e.message } }
-
end
-
-
private
-
-
def command_available?(command)
-
system("command -v #{Shellwords.escape(command)} >/dev/null 2>&1")
-
end
-
-
def extension_for(content_type:)
-
value = content_type.to_s.downcase
-
return ".mp4" if value.include?("mp4")
-
return ".mov" if value.include?("quicktime")
-
return ".webm" if value.include?("webm")
-
-
".mp4"
-
end
-
-
def integer_or_nil(value)
-
number = value.to_s.to_i
-
number.positive? ? number : nil
-
end
-
-
def frame_rate_to_float(value)
-
text = value.to_s
-
return nil if text.blank?
-
-
if text.include?("/")
-
numerator, denominator = text.split("/", 2).map(&:to_f)
-
return nil if denominator.to_f <= 0.0
-
-
(numerator / denominator).round(3)
-
else
-
number = text.to_f
-
number.positive? ? number.round(3) : nil
-
end
-
rescue StandardError
-
nil
-
end
-
end
-
require "digest"
-
require "open3"
-
require "shellwords"
-
require "tempfile"
-
-
class VideoThumbnailService
-
DEFAULT_SEEK_SECONDS = 0.2
-
MAX_THUMBNAIL_BYTES = 3 * 1024 * 1024
-
-
def initialize(ffmpeg_bin: nil, seek_seconds: nil)
-
resolved_bin = ffmpeg_bin.to_s.presence || ENV["FFMPEG_BIN"].to_s.presence || default_ffmpeg_bin
-
@ffmpeg_bin = resolved_bin.to_s
-
@seek_seconds = seek_seconds.to_f.positive? ? seek_seconds.to_f : DEFAULT_SEEK_SECONDS
-
end
-
-
def extract_first_frame(video_bytes:, reference_id:, content_type: nil)
-
return empty_result("video_bytes_missing") if video_bytes.blank?
-
return empty_result("ffmpeg_missing") unless command_available?(@ffmpeg_bin)
-
-
Tempfile.create([ "video_thumb_source_#{safe_reference(reference_id)}", extension_for(content_type: content_type) ]) do |video_file|
-
video_file.binmode
-
video_file.write(video_bytes)
-
video_file.flush
-
-
Tempfile.create([ "video_thumb_output_#{safe_reference(reference_id)}", ".jpg" ]) do |thumb_file|
-
thumb_file.binmode
-
-
cmd = [
-
@ffmpeg_bin,
-
"-hide_banner",
-
"-loglevel", "error",
-
"-ss", format("%.2f", @seek_seconds),
-
"-i", video_file.path,
-
"-frames:v", "1",
-
"-q:v", "3",
-
"-f", "image2",
-
thumb_file.path
-
]
-
_stdout, stderr, status = Open3.capture3(*cmd)
-
return empty_result("ffmpeg_extract_failed", stderr: stderr.to_s) unless status.success?
-
-
image_bytes = File.binread(thumb_file.path)
-
return empty_result("thumbnail_missing") if image_bytes.blank?
-
return empty_result("thumbnail_too_large") if image_bytes.bytesize > MAX_THUMBNAIL_BYTES
-
-
digest = Digest::SHA256.hexdigest("#{reference_id}:#{image_bytes.byteslice(0, 128)}")
-
{
-
ok: true,
-
image_bytes: image_bytes,
-
content_type: "image/jpeg",
-
filename: "video_thumb_#{digest[0, 12]}.jpg",
-
metadata: {
-
source: "ffmpeg",
-
seek_seconds: @seek_seconds,
-
bytes: image_bytes.bytesize
-
}
-
}
-
end
-
end
-
rescue StandardError => e
-
empty_result("thumbnail_extraction_error", stderr: e.message)
-
end
-
-
private
-
-
def command_available?(command)
-
system("command -v #{Shellwords.escape(command)} >/dev/null 2>&1")
-
end
-
-
def safe_reference(value)
-
value.to_s.gsub(/[^a-zA-Z0-9_-]/, "_").first(32).presence || "video"
-
end
-
-
def default_ffmpeg_bin
-
local_bin = File.expand_path("~/.local/bin/ffmpeg")
-
return local_bin if File.exist?(local_bin)
-
-
"ffmpeg"
-
end
-
-
def extension_for(content_type:)
-
value = content_type.to_s.downcase
-
return ".mov" if value.include?("quicktime")
-
return ".webm" if value.include?("webm")
-
return ".mp4" if value.include?("mp4")
-
-
".mp4"
-
end
-
-
def empty_result(reason, stderr: nil)
-
{
-
ok: false,
-
image_bytes: nil,
-
content_type: nil,
-
filename: nil,
-
metadata: {
-
source: "ffmpeg",
-
reason: reason,
-
stderr: stderr.to_s.presence
-
}.compact
-
}
-
end
-
end
-
require "set"
-
-
module Workspace
-
class ActionsTodoQueueService
-
DEFAULT_LIMIT = 30
-
MAX_LIMIT = 120
-
MAX_POST_AGE_DAYS = 5
-
PRELOAD_MULTIPLIER = 8
-
ENQUEUE_BATCH_SIZE = ENV.fetch("WORKSPACE_ACTIONS_ENQUEUE_BATCH_SIZE", 8).to_i.clamp(1, 30)
-
NON_PROCESSABLE_STATUSES = %w[
-
ready
-
failed
-
skipped_page_profile
-
skipped_deleted_source
-
skipped_non_user_post
-
].freeze
-
-
def initialize(account:, limit: DEFAULT_LIMIT, enqueue_processing: true, now: Time.current)
-
@account = account
-
@limit = limit.to_i.clamp(1, MAX_LIMIT)
-
@enqueue_processing = ActiveModel::Type::Boolean.new.cast(enqueue_processing)
-
@now = now
-
@profile_policy_cache = {}
-
end
-
-
def fetch!
-
posts = candidate_posts
-
return empty_result if posts.empty?
-
-
sent_keys = commented_post_keys(profile_ids: posts.map(&:instagram_profile_id).uniq)
-
items = posts.filter_map { |post| build_item(post: post, sent_keys: sent_keys) }
-
return empty_result if items.empty?
-
-
ordered = sort_items(items: items)
-
enqueued_count = enqueue_processing_jobs(items: ordered) if @enqueue_processing
-
-
{
-
items: ordered.first(@limit),
-
stats: {
-
total_items: ordered.length,
-
ready_items: ordered.count { |row| row[:suggestions].any? },
-
processing_items: ordered.count { |row| row[:requires_processing] },
-
enqueued_now: enqueued_count.to_i,
-
refreshed_at: @now.iso8601(3)
-
}
-
}
-
end
-
-
private
-
-
attr_reader :account, :limit, :now
-
-
def empty_result
-
{
-
items: [],
-
stats: {
-
total_items: 0,
-
ready_items: 0,
-
processing_items: 0,
-
enqueued_now: 0,
-
refreshed_at: now.iso8601(3)
-
}
-
}
-
end
-
-
def candidate_posts
-
scope_limit = [ limit * PRELOAD_MULTIPLIER, limit ].max
-
cutoff = MAX_POST_AGE_DAYS.days.ago
-
-
account.instagram_profile_posts
-
.includes(instagram_profile: :profile_tags, media_attachment: :blob, preview_image_attachment: :blob)
-
.where("taken_at >= ?", cutoff)
-
.order(taken_at: :desc, id: :desc)
-
.limit(scope_limit)
-
.to_a
-
rescue StandardError
-
[]
-
end
-
-
def build_item(post:, sent_keys:)
-
profile = post.instagram_profile
-
return nil unless profile
-
return nil unless user_profile?(profile)
-
return nil if source_deleted_post?(post)
-
return nil unless user_created_post?(post)
-
-
comment_key = "#{post.instagram_profile_id}:#{post.shortcode}"
-
return nil if sent_keys.include?(comment_key)
-
-
analysis = post.analysis.is_a?(Hash) ? post.analysis : {}
-
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
policy = metadata["comment_generation_policy"].is_a?(Hash) ? metadata["comment_generation_policy"] : {}
-
workspace_state = metadata["workspace_actions"].is_a?(Hash) ? metadata["workspace_actions"] : {}
-
suggestions = Array(analysis["comment_suggestions"]).map { |value| value.to_s.strip }.reject(&:blank?).uniq.first(3)
-
processing_status = derive_processing_status(post: post, suggestions: suggestions, workspace_state: workspace_state, policy: policy)
-
processing_message = derive_processing_message(processing_status: processing_status, workspace_state: workspace_state, policy: policy, post: post)
-
-
{
-
post: post,
-
profile: profile,
-
analysis: analysis,
-
metadata: metadata,
-
suggestions: suggestions,
-
policy: policy,
-
workspace_state: workspace_state,
-
processing_status: processing_status,
-
processing_message: processing_message,
-
requires_processing: suggestions.empty? && !NON_PROCESSABLE_STATUSES.include?(processing_status.to_s),
-
post_taken_at: post.taken_at,
-
profile_last_active_at: profile.last_active_at
-
}
-
end
-
-
def derive_processing_status(post:, suggestions:, workspace_state:, policy:)
-
return "ready" if suggestions.any?
-
-
status = workspace_state["status"].to_s
-
return status if status.present?
-
-
return "waiting_media_download" unless post.media.attached?
-
-
ai_status = post.ai_status.to_s
-
return "waiting_post_analysis" if ai_status == "pending" || ai_status == "running"
-
-
reason_code = policy["history_reason_code"].to_s
-
if policy["status"].to_s == "blocked" && reason_code.in?(WorkspaceProcessActionsTodoPostJob::PROFILE_INCOMPLETE_REASON_CODES)
-
return "waiting_build_history"
-
end
-
-
"queued_for_processing"
-
end
-
-
def derive_processing_message(processing_status:, workspace_state:, policy:, post:)
-
case processing_status.to_s
-
when "ready"
-
"Suggestions are ready."
-
when "waiting_media_download"
-
"Preview media download is queued."
-
when "waiting_post_analysis"
-
"Post analysis is running in background."
-
when "waiting_build_history", "waiting_profile_analysis"
-
"Build History is running; comment generation will resume automatically."
-
when "running"
-
"Preparing suggestions in background."
-
when "queued"
-
"Queued for background processing."
-
when "failed"
-
workspace_state["last_error"].to_s.presence || "Background processing failed. Will retry."
-
when "skipped_page_profile"
-
"Skipped because this account is classified as a page."
-
when "skipped_deleted_source"
-
"Skipped because this post was deleted from source."
-
when "skipped_non_user_post"
-
"Skipped because this row is not a user-created post."
-
else
-
if post.ai_status.to_s == "analyzed"
-
policy["blocked_reason"].to_s.presence || "Awaiting comment suggestions."
-
else
-
"Queued for analysis and suggestion generation."
-
end
-
end
-
end
-
-
def sort_items(items:)
-
status_priority = {
-
"failed" => 6,
-
"running" => 5,
-
"queued" => 4,
-
"waiting_build_history" => 4,
-
"waiting_profile_analysis" => 4,
-
"waiting_post_analysis" => 3,
-
"waiting_media_download" => 3,
-
"queued_for_processing" => 2,
-
"ready" => 1,
-
"skipped_non_user_post" => 0,
-
"skipped_deleted_source" => 0,
-
"skipped_page_profile" => 0
-
}
-
-
items.sort_by do |item|
-
[
-
status_priority[item[:processing_status].to_s].to_i,
-
item[:profile_last_active_at] || Time.at(0),
-
item[:post_taken_at] || Time.at(0)
-
]
-
end.reverse
-
end
-
-
def enqueue_processing_jobs(items:)
-
candidates = items.select { |item| item[:requires_processing] }.first(ENQUEUE_BATCH_SIZE)
-
enqueued = 0
-
-
candidates.each do |item|
-
result = WorkspaceProcessActionsTodoPostJob.enqueue_if_needed!(
-
account: account,
-
profile: item[:profile],
-
post: item[:post],
-
requested_by: "workspace_actions_queue"
-
)
-
enqueued += 1 if ActiveModel::Type::Boolean.new.cast(result[:enqueued])
-
rescue StandardError
-
next
-
end
-
-
enqueued
-
end
-
-
def user_profile?(profile)
-
decision = cached_profile_decision(profile: profile)
-
return false if ActiveModel::Type::Boolean.new.cast(decision[:skip_post_analysis])
-
-
tag_names = profile.profile_tags.map { |tag| tag.name.to_s.downcase }
-
return false if tag_names.any? { |name| %w[page brand business company publisher].include?(name) }
-
-
true
-
rescue StandardError
-
false
-
end
-
-
def cached_profile_decision(profile:)
-
@profile_policy_cache[profile.id] ||= Instagram::ProfileScanPolicy.new(profile: profile).decision
-
end
-
-
def source_deleted_post?(post)
-
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
ActiveModel::Type::Boolean.new.cast(metadata["deleted_from_source"])
-
end
-
-
def user_created_post?(post)
-
metadata = post.metadata.is_a?(Hash) ? post.metadata : {}
-
post_kind = metadata["post_kind"].to_s.downcase
-
return false if post_kind == "story"
-
-
product_type = metadata["product_type"].to_s.downcase
-
return false if product_type == "story"
-
return false if ActiveModel::Type::Boolean.new.cast(metadata["is_story"])
-
-
true
-
rescue StandardError
-
false
-
end
-
-
def commented_post_keys(profile_ids:)
-
return Set.new if profile_ids.blank?
-
-
events =
-
InstagramProfileEvent
-
.joins(:instagram_profile)
-
.where(instagram_profiles: { instagram_account_id: account.id, id: profile_ids })
-
.where(kind: "post_comment_sent")
-
.order(detected_at: :desc, id: :desc)
-
.limit(2_000)
-
-
Set.new(
-
events.filter_map do |event|
-
shortcode = event.metadata.is_a?(Hash) ? event.metadata["post_shortcode"].to_s.strip : ""
-
next if shortcode.blank?
-
-
"#{event.instagram_profile_id}:#{shortcode}"
-
end
-
)
-
rescue StandardError
-
Set.new
-
end
-
end
-
end
-
#!/usr/bin/env ruby
-
-
# Story Debug Analyzer
-
# This script analyzes the captured HTML snapshots and debug data to identify story skipping issues
-
-
require 'json'
-
require 'fileutils'
-
-
class StoryDebugAnalyzer
-
def initialize
-
@debug_dir = Rails.root.join('tmp', 'story_debug_snapshots')
-
@reel_debug_dir = Rails.root.join('tmp', 'story_reel_debug')
-
end
-
-
def analyze_all
-
puts "=== Story Debug Analysis ==="
-
puts "Analyzing captured data at #{Time.current}"
-
puts
-
-
analyze_html_snapshots
-
analyze_reel_data
-
generate_summary_report
-
end
-
-
private
-
-
def analyze_html_snapshots
-
puts "--- HTML Snapshots Analysis ---"
-
-
return unless Dir.exist?(@debug_dir)
-
-
html_files = Dir.glob(File.join(@debug_dir, '*.html')).sort
-
puts "Found #{html_files.size} HTML snapshot files"
-
-
html_files.each do |file|
-
filename = File.basename(file)
-
match = filename.match(/^(.+)_story_(\d+)_(\d+)_(.+)\.html$/)
-
-
if match
-
username = match[1]
-
story_index = match[2].to_i
-
story_id = match[3]
-
timestamp = match[4]
-
-
puts "\n📸 #{username} - Story #{story_index} (ID: #{story_id})"
-
-
# Extract key information from HTML
-
content = File.read(file)
-
-
# Check if story was marked as already processed
-
if content.include?('Already Processed: true')
-
puts " ⚠️ Story was marked as ALREADY PROCESSED"
-
else
-
puts " ✅ Story was processed normally"
-
end
-
-
# Extract story count info
-
if content.match(/Story Index:\s*(\d+)\s*\/\s*(\d+)/)
-
current_index, total = $1.to_i, $2.to_i
-
puts " 📊 Position: #{current_index}/#{total} stories"
-
-
if current_index > 0 && content.include?('Already Processed: true')
-
puts " 🔍 ISSUE: Story #{current_index} was skipped but it's not the first story!"
-
end
-
end
-
-
# Look for recent events that might indicate duplicate processing
-
if content.match(/"kind":\s*"story_uploaded"/)
-
puts " 📝 Found previous story_upload events"
-
end
-
end
-
end
-
-
puts
-
end
-
-
def analyze_reel_data
-
puts "--- Raw Reel Data Analysis ---"
-
-
return unless Dir.exist?(@reel_debug_dir)
-
-
json_files = Dir.glob(File.join(@reel_debug_dir, '*.json')).sort
-
puts "Found #{json_files.size} reel data files"
-
-
json_files.each do |file|
-
filename = File.basename(file)
-
match = filename.match(/^(.+)_reel_(\d+)_(.+)\.json$/)
-
-
if match
-
username = match[1]
-
user_id = match[2]
-
timestamp = match[3]
-
-
puts "\n🎥 #{username} (User ID: #{user_id})"
-
-
begin
-
data = JSON.parse(File.read(file))
-
-
puts " 📊 Items in reel: #{data['items_count']}"
-
puts " 📊 Reels count: #{data['reels_count']}"
-
puts " 📊 Reels media count: #{data['reels_media_count']}"
-
-
# Analyze raw response structure
-
raw = data['raw_response']
-
if raw['reels']&.is_a?(Hash)
-
raw['reels'].each do |reel_id, reel_data|
-
if reel_data.is_a?(Hash) && reel_data['items'].is_a?(Array)
-
puts " 📹 Reel #{reel_id}: #{reel_data['items'].size} items"
-
-
# Show story IDs for debugging
-
story_ids = reel_data['items'].map { |item| item['pk'] || item['id'] }.compact
-
puts " 🆔 Story IDs: #{story_ids.join(', ')}"
-
-
# Check for duplicate IDs
-
if story_ids.size != story_ids.uniq.size
-
puts " ⚠️ DUPLICATE STORY IDs DETECTED!"
-
end
-
end
-
end
-
end
-
-
rescue JSON::ParserError => e
-
puts " ❌ Failed to parse JSON: #{e.message}"
-
end
-
end
-
end
-
-
puts
-
end
-
-
def generate_summary_report
-
puts "--- Summary Report ---"
-
-
# Count total stories processed vs skipped
-
total_snapshots = 0
-
skipped_stories = 0
-
-
if Dir.exist?(@debug_dir)
-
html_files = Dir.glob(File.join(@debug_dir, '*.html'))
-
total_snapshots = html_files.size
-
-
html_files.each do |file|
-
content = File.read(file)
-
if content.include?('Already Processed: true')
-
skipped_stories += 1
-
end
-
end
-
end
-
-
puts "📊 Total story snapshots: #{total_snapshots}"
-
puts "📊 Stories skipped: #{skipped_stories}"
-
puts "📊 Stories processed: #{total_snapshots - skipped_stories}"
-
-
if skipped_stories > 0
-
skip_percentage = (skipped_stories.to_f / total_snapshots * 100).round(1)
-
puts "⚠️ Skip rate: #{skip_percentage}%"
-
-
if skip_percentage > 50
-
puts "🚨 HIGH skip rate detected! This indicates a potential issue with story processing logic."
-
elsif skip_percentage > 25
-
puts "⚠️ Elevated skip rate detected. Review the skipping logic."
-
end
-
end
-
-
puts
-
puts "📁 Debug files location:"
-
puts " HTML snapshots: #{@debug_dir}"
-
puts " Raw reel data: #{@reel_debug_dir}"
-
puts
-
puts "💡 Recommendations:"
-
puts " 1. Check if stories are being incorrectly marked as duplicates"
-
puts " 2. Verify story_id uniqueness in the raw reel data"
-
puts " 3. Review the already_processed_story? method logic"
-
puts " 4. Consider using force_analyze_all: true to bypass skipping for testing"
-
end
-
end
-
-
# Run the analysis if this script is executed directly
-
if __FILE__ == $0
-
analyzer = StoryDebugAnalyzer.new
-
analyzer.analyze_all
-
end
-
#!/usr/bin/env ruby
-
-
require "json"
-
require "uri"
-
-
class StoryNetworkAnalyzer
-
DEBUG_GLOB = "log/instagram_debug/**/*.json".freeze
-
-
def initialize(debug_glob: DEBUG_GLOB)
-
@debug_glob = debug_glob
-
end
-
-
def analyze!
-
files = Dir.glob(Rails.root.join(@debug_glob).to_s).sort
-
-
endpoint_counts = Hash.new(0)
-
endpoint_statuses = Hash.new { |h, k| h[k] = Hash.new(0) }
-
story_graphql_counts = Hash.new(0)
-
story_graphql_samples = Hash.new { |h, k| h[k] = [] }
-
story_api_counts = Hash.new(0)
-
story_api_samples = Hash.new { |h, k| h[k] = [] }
-
-
files.each do |file|
-
json = parse_json(File.read(file))
-
next unless json.is_a?(Hash)
-
-
logs = json["performance_logs"]
-
next unless logs.is_a?(Array)
-
-
request_meta = Hash.new { |h, k| h[k] = {} }
-
-
logs.each do |entry|
-
raw = entry.is_a?(Hash) ? entry["message"].to_s : ""
-
outer = parse_json(raw)
-
inner = outer.is_a?(Hash) ? outer["message"] : nil
-
next unless inner.is_a?(Hash)
-
-
method = inner["method"].to_s
-
params = inner["params"].is_a?(Hash) ? inner["params"] : {}
-
request_id = params["requestId"].to_s
-
-
case method
-
when "Network.requestWillBeSent"
-
request = params["request"].is_a?(Hash) ? params["request"] : {}
-
url = request["url"].to_s
-
next if url.blank?
-
-
endpoint = normalize_endpoint(url)
-
next if endpoint.blank?
-
-
endpoint_counts[endpoint] += 1
-
request_meta[request_id][:endpoint] = endpoint if request_id.present?
-
-
if story_api_endpoint?(endpoint)
-
story_api_counts[endpoint] += 1
-
add_sample(story_api_samples[endpoint], file)
-
end
-
when "Network.requestWillBeSentExtraInfo"
-
headers = params["headers"].is_a?(Hash) ? params["headers"] : {}
-
path = header_value(headers, ":path")
-
friendly = header_value(headers, "x-fb-friendly-name")
-
root_field = header_value(headers, "x-root-field-name")
-
-
endpoint = normalize_endpoint(path)
-
if endpoint.present?
-
endpoint_counts[endpoint] += 1
-
request_meta[request_id][:endpoint] = endpoint if request_id.present?
-
end
-
-
if story_graphql_signature?(friendly: friendly, root_field: root_field)
-
key = [ endpoint.presence || "(unknown_path)", friendly, root_field ]
-
story_graphql_counts[key] += 1
-
add_sample(story_graphql_samples[key], file)
-
end
-
when "Network.responseReceived"
-
status = params.dig("response", "status").to_i
-
endpoint = request_meta.dig(request_id, :endpoint)
-
next if endpoint.blank?
-
-
endpoint_statuses[endpoint][status] += 1
-
end
-
end
-
end
-
-
{
-
generated_at: Time.current.utc.iso8601(3),
-
files_scanned: files.length,
-
top_endpoints: sort_hash(endpoint_counts).first(80).map do |endpoint, count|
-
{
-
endpoint: endpoint,
-
count: count,
-
statuses: sort_hash(endpoint_statuses[endpoint]).to_h
-
}
-
end,
-
story_graphql_signatures: sort_hash(story_graphql_counts).map do |(endpoint, friendly, root_field), count|
-
{
-
endpoint: endpoint,
-
friendly_name: friendly,
-
root_field: root_field,
-
count: count,
-
sample_files: story_graphql_samples[[ endpoint, friendly, root_field ]]
-
}
-
end,
-
story_api_endpoints: sort_hash(story_api_counts).map do |endpoint, count|
-
{
-
endpoint: endpoint,
-
count: count,
-
sample_files: story_api_samples[endpoint]
-
}
-
end
-
}
-
end
-
-
private
-
-
def parse_json(raw)
-
JSON.parse(raw)
-
rescue StandardError
-
nil
-
end
-
-
def normalize_endpoint(value)
-
raw = value.to_s.strip
-
return "" if raw.blank?
-
-
if raw.start_with?("http://", "https://")
-
uri = URI.parse(raw)
-
path = uri.path.to_s
-
query = uri.query.to_s
-
query.present? ? "#{path}?#{query}" : path
-
else
-
raw
-
end
-
rescue StandardError
-
""
-
end
-
-
def header_value(headers, key)
-
return "" unless headers.is_a?(Hash)
-
-
headers[key].to_s.presence ||
-
headers[key.downcase].to_s.presence ||
-
headers[key.upcase].to_s.presence ||
-
""
-
end
-
-
def story_graphql_signature?(friendly:, root_field:)
-
friendly_s = friendly.to_s
-
root_s = root_field.to_s
-
-
friendly_s.include?("StoriesV3") ||
-
root_s.include?("__stories__") ||
-
root_s.include?("__reels_") ||
-
root_s.include?("__feed__reels")
-
end
-
-
def story_api_endpoint?(endpoint)
-
endpoint_s = endpoint.to_s
-
-
endpoint_s.include?("/api/v1/feed/reels_media/") ||
-
endpoint_s.include?("/api/v1/stories/") ||
-
endpoint_s.include?("/api/v1/story_interactions/") ||
-
endpoint_s.include?("/api/v1/direct_v2/threads/broadcast/reel_share/") ||
-
endpoint_s.include?("/stories/")
-
end
-
-
def add_sample(sample_array, file)
-
return unless sample_array.is_a?(Array)
-
-
relative = Pathname.new(file).relative_path_from(Rails.root).to_s
-
sample_array << relative unless sample_array.include?(relative)
-
sample_array.slice!(3..-1) if sample_array.length > 3
-
end
-
-
def sort_hash(hash)
-
hash.to_a.sort_by { |(_, value)| -value.to_i }
-
end
-
end